We look for genes associated with fibroblast activation in several single-cell datasets.
suppressPackageStartupMessages({
# install.packages("BiocManager")
# install.packages("kableExtra")
requireNamespace("kableExtra")
# ?
# devtools::install_github("rstudio/crosstalk")
# library("crosstalk")
# library("htmlwidgets")
# install.packages("crunch")
# requireNamespace("crunch")
# https://rstudio.github.io/DT/
# install.packages("DT")
requireNamespace("DT")
# install.packages("RCurl")
requireNamespace("RCurl")
# install.packages("dplyr")
library(dplyr)
# install.packages("ggplot2")
library(ggplot2)
# install.packages("ggsci")
requireNamespace("ggsci")
# install.packages("ggtext")
requireNamespace("ggtext")
# install.packages("ggrepel")
requireNamespace("ggrepel")
# install.packages("assertthat")
requireNamespace("assertthat")
#
requireNamespace("magrittr")
`%<>%` <- magrittr::`%<>%`
`%>%` <- magrittr::`%>%`
# install.packages("glue")
requireNamespace("glue")
glue <- glue::glue
# install.packages("tidyr")
requireNamespace("tidyr")
# BiocManager::install("org.Mm.eg.db")
# BiocManager::install("org.Hs.eg.db")
requireNamespace("org.Mm.eg.db")
requireNamespace("org.Hs.eg.db")
# https://www.bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
# BiocManager::install("goseq")
# requireNamespace("goseq")
# install.packages("pracma")
requireNamespace("pracma")
# https://vroom.r-lib.org/articles/vroom.html
# install.packages("vroom")
requireNamespace("vroom")
# BiocManager::install("DESeq2")
requireNamespace("DESeq2")
# BiocManager::install("scater")
requireNamespace("scater")
# BiocManager::install("scran")
# requireNamespace("scran")
# BiocManager::install("igraph")
requireNamespace("igraph")
# install.packages("pheatmap")
requireNamespace("pheatmap")
# install.packages("gridExtra")
requireNamespace("gridExtra")
# install.packages("pathlibr")
requireNamespace("pathlibr")
# install.packages("stringr")
requireNamespace("stringr")
# install.packages("reshape2")
requireNamespace("reshape2")
# install.packages("pbapply")
# requireNamespace("pbapply")
# install.packages("Rtsne")
requireNamespace("Rtsne")
# install.packages("uwot")
requireNamespace("uwot")
# avoid importing `exprs` that leads to clashes
requireNamespace("rlang")
# install.packages("Seurat")
# requireNamespace("Seurat")
# BiocManager::install("TrajectoryUtils") # Didn't work
# devtools::install_github("LTLA/TrajectoryUtils@e943606")
# BiocManager::install("kstreet13/slingshot@25fc566")
# requireNamespace("slingshot")
# BiocManager::install("tradeSeq")
# requireNamespace("tradeSeq")
# install.packages("statmod")
# BiocManager::install("edgeR")
requireNamespace("edgeR")
# https://github.com/eliocamp/ggnewscale
# install.packages("ggnewscale")
# requireNamespace("ggnewscale")
# https://www.rdocumentation.org/packages/DGCA/versions/1.0.2
# BiocManager::install("impute")
# BiocManager::install("preprocessCore")
# BiocManager::install("GO.db")
# install.packages("WGCNA")
# install.packages("DGCA")
requireNamespace("DGCA")
# https://github.com/danielschw188/Revelio
# devtools::install_github('danielschw188/Revelio@e8e1492')
requireNamespace("Revelio")
# # install.packages("rgl")
# requireNamespace("rgl")
# install.packages("plotly")
requireNamespace("plotly")
# https://www.rdocumentation.org/packages/Hmisc/versions/4.5-0/topics/rcorr
# install.packages("Hmisc")
requireNamespace("Hmisc")
# BiocManager::install("multiGSEA")
# requireNamespace("multiGSEA")
# install.packages("tidygraph")
# install.packages("ggraph")
requireNamespace("ggraph")
requireNamespace("tidygraph")
# https://htmlpreview.github.io/?https://github.com/aertslab/SCENIC/blob/master/inst/doc/SCENIC_Setup.html
# BiocManager::install(c("AUCell", "RcisTarget"))
# BiocManager::install(c("GRNBoost"))
# BiocManager::install(c("doMC", "doRNG"))
# devtools::install_github("aertslab/SCENIC@0585e87")
# requireNamespace("SCENIC")
})
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/glue/libs/glue.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/xml2/libs/xml2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/colorspace/libs/colorspace.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/systemfonts/libs/systemfonts.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/svglite/libs/svglite.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bitops/libs/bitops.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RCurl/libs/RCurl.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vctrs/libs/vctrs.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ellipsis/libs/ellipsis.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fansi/libs/fansi.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/utf8/libs/utf8.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tibble/libs/tibble.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/purrr/libs/purrr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/dplyr/libs/dplyr.so") ...
## now dyn.load("/usr/lib/R/library/grid/libs/grid.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/libs/Rcpp.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/gridtext/libs/gridtext.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ggrepel/libs/ggrepel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidyr/libs/tidyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit/libs/bit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit64/libs/bit64.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastmap/libs/fastmap.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/cachem/libs/cachem.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSQLite/libs/RSQLite.so") ...
## now dyn.load("/usr/lib/R/library/parallel/libs/parallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biobase/libs/Biobase.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/S4Vectors/libs/S4Vectors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/IRanges/libs/IRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/png/libs/png.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XVector/libs/XVector.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biostrings/libs/Biostrings.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vroom/libs/vroom.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocParallel/libs/BiocParallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/GenomicRanges/libs/GenomicRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/matrixStats/libs/matrixStats.so") ...
## now dyn.load("/usr/lib/R/library/lattice/libs/lattice.so") ...
## now dyn.load("/usr/lib/R/library/Matrix/libs/Matrix.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DelayedArray/libs/DelayedArray.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XML/libs/XML.so") ...
## now dyn.load("/usr/lib/R/library/splines/libs/splines.so") ...
## now dyn.load("/usr/lib/R/library/survival/libs/survival.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/genefilter/libs/genefilter.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/locfit/libs/locfit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DESeq2/libs/DESeq2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/sparseMatrixStats/libs/sparseMatrixStats.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beachmat/libs/beachmat.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/scuttle/libs/scuttle.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocNeighbors/libs/BiocNeighbors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/irlba/libs/irlba.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocSingular/libs/BiocSingular.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/igraph/libs/igraph.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/plyr/libs/plyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/reshape2/libs/reshape2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rtsne/libs/Rtsne.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/uwot/libs/uwot.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/limma/libs/limma.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/edgeR/libs/edgeR.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastcluster/libs/fastcluster.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/base64enc/libs/base64enc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/jpeg/libs/jpeg.so") ...
## now dyn.load("/usr/lib/R/library/cluster/libs/cluster.so") ...
## now dyn.load("/usr/lib/R/library/foreign/libs/foreign.so") ...
## now dyn.load("/usr/lib/R/library/nnet/libs/nnet.so") ...
## now dyn.load("/usr/lib/R/library/rpart/libs/rpart.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/checkmate/libs/checkmate.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/backports/libs/backports.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/data.table/libs/datatable.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Hmisc/libs/Hmisc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/impute/libs/impute.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/preprocessCore/libs/preprocessCore.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/WGCNA/libs/WGCNA.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/lazyeval/libs/lazyeval.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidygraph/libs/tidygraph.so") ...
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=lzh_TW
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=lzh_TW LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=lzh_TW LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=lzh_TW LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 Hmisc_4.5-0
## [3] systemfonts_1.0.2 plyr_1.8.6
## [5] igraph_1.2.6 lazyeval_0.2.2
## [7] splines_4.1.0 Revelio_0.1.0
## [9] BiocParallel_1.26.0 GenomeInfoDb_1.28.0
## [11] scater_1.20.0 digest_0.6.27
## [13] foreach_1.5.1 htmltools_0.5.1.1
## [15] viridis_0.6.1 GO.db_3.13.0
## [17] fansi_0.5.0 checkmate_2.0.0
## [19] magrittr_2.0.1 memoise_2.0.0
## [21] ScaledMatrix_1.0.0 cluster_2.1.2
## [23] doParallel_1.0.16 limma_3.48.0
## [25] fastcluster_1.2.3 Biostrings_2.60.1
## [27] annotate_1.70.0 matrixStats_0.59.0
## [29] vroom_1.4.0 svglite_2.0.0
## [31] jpeg_0.1-8.1 colorspace_2.0-2
## [33] blob_1.2.1 rvest_1.0.0
## [35] ggrepel_0.9.1 xfun_0.23
## [37] crayon_1.4.1 RCurl_1.98-1.3
## [39] jsonlite_1.7.2 org.Mm.eg.db_3.13.0
## [41] genefilter_1.74.0 impute_1.66.0
## [43] survival_3.2-11 iterators_1.0.13
## [45] glue_1.4.2 kableExtra_1.3.4
## [47] gtable_0.3.0 zlibbioc_1.38.0
## [49] XVector_0.32.0 webshot_0.5.2
## [51] DelayedArray_0.18.0 BiocSingular_1.8.0
## [53] SingleCellExperiment_1.14.1 BiocGenerics_0.38.0
## [55] scales_1.1.1 pheatmap_1.0.12
## [57] DBI_1.1.1 edgeR_3.34.0
## [59] Rcpp_1.0.6 htmlTable_2.2.1
## [61] viridisLite_0.4.0 xtable_1.8-4
## [63] gridtext_0.1.4 foreign_0.8-81
## [65] bit_4.0.4 rsvd_1.0.5
## [67] preprocessCore_1.54.0 Formula_1.2-4
## [69] stats4_4.1.0 DT_0.18
## [71] htmlwidgets_1.5.3 httr_1.4.2
## [73] RColorBrewer_1.1-2 ellipsis_0.3.2
## [75] pkgconfig_2.0.3 XML_3.99-0.6
## [77] scuttle_1.2.0 nnet_7.3-16
## [79] sass_0.4.0 uwot_0.1.10
## [81] locfit_1.5-9.4 utf8_1.2.1
## [83] dynamicTreeCut_1.63-1 tidyselect_1.1.1
## [85] rlang_0.4.11 reshape2_1.4.4
## [87] AnnotationDbi_1.54.1 munsell_0.5.0
## [89] tools_4.1.0 cachem_1.0.5
## [91] generics_0.1.0 RSQLite_2.2.7
## [93] evaluate_0.14 stringr_1.4.0
## [95] fastmap_1.1.0 yaml_2.2.1
## [97] org.Hs.eg.db_3.13.0 knitr_1.33
## [99] bit64_4.0.5 tidygraph_1.2.0
## [101] purrr_0.3.4 KEGGREST_1.32.0
## [103] sparseMatrixStats_1.4.0 pracma_2.3.3
## [105] xml2_1.3.2 compiler_4.1.0
## [107] rstudioapi_0.13 plotly_4.9.3
## [109] beeswarm_0.3.1 png_0.1-7
## [111] tibble_3.1.2 geneplotter_1.70.0
## [113] bslib_0.2.5.1 stringi_1.6.2
## [115] lattice_0.20-44 DGCA_1.0.2
## [117] Matrix_1.3-4 ggsci_2.9
## [119] vctrs_0.3.8 pillar_1.6.1
## [121] lifecycle_1.0.0 jquerylib_0.1.4
## [123] BiocNeighbors_1.10.0 data.table_1.14.0
## [125] bitops_1.0-7 irlba_2.3.3
## [127] GenomicRanges_1.44.0 R6_2.5.0
## [129] latticeExtra_0.6-29 gridExtra_2.3
## [131] vipor_0.4.5 IRanges_2.26.0
## [133] codetools_0.2-18 assertthat_0.2.1
## [135] SummarizedExperiment_1.22.0 DESeq2_1.32.0
## [137] withr_2.4.2 S4Vectors_0.30.0
## [139] GenomeInfoDbData_1.2.6 parallel_4.1.0
## [141] ggtext_0.1.1 rpart_4.1-15
## [143] grid_4.1.0 pathlibr_0.1.0
## [145] beachmat_2.8.0 tidyr_1.1.3
## [147] rmarkdown_2.8 DelayedMatrixStats_1.14.0
## [149] MatrixGenerics_1.4.0 Rtsne_0.15
## [151] Biobase_2.52.0 WGCNA_1.70-3
## [153] base64enc_0.1-3 ggbeeswarm_0.6.0
set.seed(43)
dual <- function(f, g) {
if (missing(g)) {
stopifnot(class(f) == "function")
(function(L) do.call(f, Filter(Negate(is.null), L)))
} else {
stopifnot(class(f) == "list")
stopifnot(class(g) == "function")
dual(g)(f)
}
}
Example 1:
list(
biggle = list(A = "hello", B = "world"),
wiggle = list(A = "Hello", B = "World", C = NULL)
) %>%
lapply(dual(function(B, A) paste(A, B)))
## $biggle
## [1] "hello world"
##
## $wiggle
## [1] "Hello World"
Example 2:
list(
A = data.frame(x = c("xa1", "xa2"), y = c("ya1", "ya2")),
B = data.frame(x = c("xb1", "xb2"), y = c("yb1", "yb2")),
C = data.frame(x = c("xc1", "xc2"), y = c("yc1", "yc2"))
) %>%
# No need for the brackets!
dual(cbind)
## A.x A.y B.x B.y C.x C.y
## 1 xa1 ya1 xb1 yb1 xc1 yc1
## 2 xa2 ya2 xb2 yb2 xc2 yc2
culprits <- function() {
search()[[1]] %>%
ls(name = .) %>%
sapply(. %>% get() %>% object.size() %>% "/"(1e6)) %>%
data.frame(size = .) %>%
arrange(size) %>%
mutate(cumsize = cumsum(size)) %>%
arrange(desc(size)) %>%
mutate(across(everything(), ~ signif(.x, digits = 2))) %>%
mutate(unit = "Mb")
}
Example:
culprits() %>% head(3)
## size cumsize unit
## dual 0.074 0.120 Mb
## glue 0.022 0.048 Mb
## culprits 0.011 0.026 Mb
for (i in 1:2) {
BASEPATH <- pathlibr::Path$new(Sys.glob(paste(c("work", ".."), "*FibroAct", sep = "/")))
setwd(BASEPATH$show)
}
print(paste("Work path:", BASEPATH))
## [1] "Work path: ../20210502-FibroAct"
stopifnot("main.Rmd" %in% names(BASEPATH$dir))
path_to <- (function(.) Sys.glob(BASEPATH$join("../..")$join(.)$show))
out_file <- (function(.) BASEPATH$join("output")$join(.)$show)
dir.create(out_file(""), showWarnings = FALSE)
Basic table printing.
kable <- function(.) {
kableExtra::kbl(., align = "c") %>%
kableExtra::kable_paper("hover", full_width = FALSE)
}
Options for fancy datatable printing.
options(DT.options = list(
pageLength = 16,
language = list(search = "Filter:"),
scrollX = TRUE
))
Converting a symbol or GO ID to a link.
as.link <-
function(x) {
dplyr::case_when(
# If x is like GO:003453
startsWith(x, "GO:") ~ paste0(
"<a target='_blank'",
"href='", "http://amigo.geneontology.org/amigo/term/", x, "'",
">", x, "</a>"
),
# Otherwise try this
TRUE ~ paste0(
"<a target='_blank'",
"href='", "http://www.informatics.jax.org/marker/summary?nomen=", x, "'",
">", x, "</a>"
)
)
}
Example
data.frame(x = c("GO:00345", "Lama")) %>%
mutate(link = as.link(x)) %>%
DT::datatable(width = "100%")
Printing a sequence of tables
kable_all <- function(tt) {
for (t in tt) {
t %>%
table() %>%
t() %>%
kable() %>%
print()
}
}
# with results='asis'
list(
a = c(c(1:5), c(2:5), c(3:5)),
b = c(c(1:6), c(2:6), c(3:6)),
b = c(c(1:7), c(2:7), c(3:7)),
b = c(c(1:9), c(2:9), c(3:9))
) %>% kable_all()
| 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 |
|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
ggplot2::theme_set(theme_light(base_size = 15))
gglast <- function(what) {
p <- ggplot2::last_plot()
q <- p$mapping[[deparse(substitute(what))]]
rlang::eval_tidy(q, data = p$data)
}
# example: scale_fill_manual(values = color_map, breaks = gglast(fill))
# Converts an annotation column (e.g. sce$group)
# to a named vector of colors (group -> color)
# List of palettes: grDevices::hcl.pals()
# http://colorspace.r-forge.r-project.org/reference/hcl_palettes.html
ramp <- function(anno, palette = "Temps") {
anno %>%
as.character() %>%
unique() %>%
c(NA) %>%
data.frame(
item = .,
# https://developer.r-project.org/Blog/public/2019/04/01/hcl-based-color-palettes-in-grdevices/
color = length(.) %>% colorRampPalette(grDevices::hcl.colors(n = ., palette = palette))(.)
) %>%
tidyr::drop_na() %>%
pull(color, item)
}
Example:
ramp(c("Grp1", "Grp2", "Grp1"), palette = "Dark 3") %>%
t() %>%
kable()
| Grp1 | Grp2 |
|---|---|
| #E16A86 | #50A315 |
RGB <- function(arr, alpha = 1) {
stopifnot(ncol(arr) == 3)
arr %>%
as.data.frame() %>%
`/`(255) %>%
mutate(alpha = alpha) %>%
mutate(colors = apply(., 1, \(x) rgb(x[1], x[2], x[3], alpha = x[["alpha"]]))) %>%
pull(colors)
}
Example:
(1:7) %>%
percent_rank() %>%
(colorRamp(c("blue", "red"))) %>%
RGB(alpha = 0.5) %>%
t() %>%
kable()
| #0000FF80 | #2B00D580 | #5500AA80 | #80008080 | #AA005580 | #D5002A80 | #FF000080 |
Colors for consistency; may be overwritten below.
colors <- unlist(
c(
list(
`374_VLMC` = "aquamarine3",
`375_VLMC` = "chartreuse4",
`376_VLMC` = "chartreuse3",
`FB1` = "cornflowerblue",
`FB2` = "blue",
`Ctrl` = "chartreuse4",
`EAE` = "coral2",
`VLMC1` = "chartreuse1",
`VLMC3` = "coral3",
`VLMC4` = "coral4",
`healthy` = "green",
`EAE1` = "coral1",
`EAE2` = "coral3",
`healthy1` = "aquamarine",
`healthy2` = "chartreuse3",
`healthy3` = "chartreuse4",
`positive` = "Red4",
`negative` = "Steelblue1"
),
ramp(c("G1.S", "S", "G2", "G2.M", "M.G1"), palette = "Dark 3")
)
)
colors %>%
unlist() %>%
data.frame(c = .) %>%
mutate(item = rownames(.)) %>%
ggplot(aes(y = item, x = 1)) +
geom_tile(fill = colors, stat = "identity") +
scale_y_discrete(limits = rev(names(colors))) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title = element_blank()
)
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/farver/libs/farver.so") ...

Zoomable figures.
///*
$(document).ready(
function() {
$("img.zoomable").on("click", function(evt) {
if (evt.originalEvent.shiftKey) {
var el = $(this).clone();
el.attr("width", "2048px");
window.open().document.write(el.wrap('<p>').parent().html());
return false;
}
});
}
);
//*/
Shift+click on a figure to zoom:
list(
randu %>% ggplot(aes(x = x, y = y)) +
geom_point() +
ggtitle("Zoomable 1"),
randu %>% ggplot(aes(x = x, y = z)) +
geom_point() +
ggtitle("Zoomable 2")
)


Click-scrollable figures (like slides):
$(document).ready(
function() {
$("img.scrollable").css({
'position': 'absolute', 'top': 0, 'left': 0, 'z-index': 100000,
});
$("img.scrollable").parent().css({'position': 'relative'});
$("img.scrollable").parent().find('img:first').css({'position': 'relative'});
$("img.scrollable").on("click", function(evt) {
if (!evt.originalEvent.altKey && !evt.originalEvent.shiftKey) {
$(this).css({'z-index': parseInt($(this).css('z-index'), 10) - 1});
evt.stopPropagation();
return false;
};
return true;
});
}
);
Click a figure to slide:
list(
randu %>% ggplot(aes(x = x, y = y)) +
geom_point() +
ggtitle("Slide 1"),
randu %>% ggplot(aes(x = x, y = z)) +
geom_point() +
ggtitle("Slide 2")
)


# Use first column as index
by_col1 <- (function(.) tibble::column_to_rownames(., colnames(.)[1]))
# Use index as new column `name`
ind2col <- (function(., name) tibble::rownames_to_column(., var = name))
take_rows <- function(df, rows = NULL) {
if (!is.null(rows)) {
return(df[rows[rows %in% rownames(df)], , drop = FALSE])
} else {
return(df)
}
}
# For reading wide tables
Sys.setenv("VROOM_CONNECTION_SIZE" = 2 ** 18)
from_tsv <- function(filename, sep = "\t") {
filename %>%
vroom::vroom(del = sep, progress = FALSE) %>%
suppressMessages() %>%
by_col1()
}
# Writes dataframe to tab-separated file
# Precede by `ind2col("name for index")` to write the row names
to_tsv <- function(df, fn, sep = "\t") {
vroom::vroom_write(df, out_file(fn), delim = sep)
}
# A small random sub-dataframe (intended for genes x samples)
shample <- function(df) {
df[
sample(rownames(df), min(nrow(.), 333)),
sample(colnames(df), min(ncol(.), 33))
]
}
# Constructs a `SingleCellExperiment` checking consistency of sample names
make_sce <- function(counts, coldata) {
# Check that samples in counts = genes x samples
# are ordered as in coldata = samples x metafields
stopifnot(assertthat::are_equal(colnames(counts), rownames(coldata)))
SingleCellExperiment::SingleCellExperiment(
assays = list(counts = counts), colData = coldata
)
}
mock_sce <- function(genes, ncells) {
sce <- scater::mockSCE(ngenes = length(genes), ncells = ncells)
sce <- make_sce(
sce@assays@data$counts %>% as.data.frame(row.names = genes),
sce@colData %>% as.data.frame() %>% rename(celltype = Mutation_Status)
)
sce
}
drop_empty <- function(sce) {
sce[, colSums(abs(sce@assays@data$counts)) != 0]
}
# Normalizes samplewise to sum(expr) = 1
# Consider using sce %>% scater::logNormCounts() %>% ...
norm1 <- function(sce) {
sce@assays@data$counts %<>%
t() %>%
{
(.) / (rowSums(.))
} %>%
t()
sce
}
# Example
mock_sce(genes = LETTERS[1:10], ncells = 3) %>%
take_rows(LETTERS[1:5]) %>%
drop_empty() %>%
norm1() %>%
scater::logNormCounts() %>%
scater::aggregateAcrossFeatures(ids = c("V", "C", "C", "C", "V"), average = TRUE)
## class: SingleCellExperiment
## dim: 2 3
## metadata(0):
## assays(1): counts
## rownames(2): C V
## rowData names(0):
## colnames(3): Cell_001 Cell_002 Cell_003
## colData names(4): celltype Cell_Cycle Treatment sizeFactor
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
taxid <- list(mm = 10090, hs = 9606)
| mm | hs |
|---|---|
| 10090 | 9606 |
symbol2geneid <- function(genes) {
data.frame(symbol = genes) %>%
merge(
y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
all.x = TRUE,
by = "symbol",
sort = FALSE # !
) %>%
pull(gene_id, symbol)
}
c("Jun", "Junb", "Junc") %>%
data.frame(symbol = .) %>%
mutate(gene_id = symbol2geneid(symbol)) %>%
kable()
| symbol | gene_id |
|---|---|
| Jun | 16476 |
| Junb | 16477 |
| Junc | NA |
omnipath.in <-
path_to("*/*/*/omnipath_in.csv.gz") %>%
from_tsv() %>%
select(
source, source_genesymbol, target, target_genesymbol,
organism,
is_inhibition, is_stimulation
)
Preview:
omnipath.tf <-
path_to("*/*/*/omnipath_tf.csv.gz") %>%
from_tsv() %>%
select(
source, source_genesymbol, target, target_genesymbol,
organism,
is_inhibition, is_stimulation
)
Preview:
omnipath.lr <-
path_to("*/*/*/omnipath_lr.csv.gz") %>%
from_tsv()
Preview:
We use Omnipath to annotate genes with their role using the following function.
# For a vector of gene symbols returns
# a vector containing the role of each symbol
symbol2role <- function(symbol, organism = taxid$mm) {
f <- function(df) {
df %>% filter(organism == organism)
}
data.frame(
TF = if_else(symbol %in% f(omnipath.tf)$source_genesymbol, "TF", ""),
Tgt = if_else(symbol %in% f(omnipath.tf)$target_genesymbol, "Tgt", ""),
Lig = if_else(symbol %in% f(omnipath.lr)$source_genesymbol, "Lig", ""),
Rec = if_else(symbol %in% f(omnipath.lr)$target_genesymbol, "Rec", "")
) %>%
apply(MARGIN = 1, FUN = function(row) {
if (any(nzchar(row))) {
paste(row[nzchar(row)], collapse = "/")
} else {
"--"
}
})
}
For example:
data.frame(symbol = c("Jun", "Notch1", "Xyz")) %>%
mutate(role = symbol2role(symbol)) %>%
kable()
| symbol | role |
|---|---|
| Jun | TF/Tgt |
| Notch1 | Tgt/Lig/Rec |
| Xyz | – |
The following provides a gene name (description) and a longer explanation (summary).
gene.summary <-
taxid %>%
lapply(
function(i) {
path_to(paste0("*/NCBI/gene/*/", names(which(. == i)), ".tsv.gz")) %>%
from_tsv() %>%
ind2col("gene_id")
}
)
Preview:
The following function sets up a gene id/symbol homology table.
# homology(taxid$mm, taxid$hs) %>% head
# gives a dataframe like
# gene_id.y gene_id.x symbol.x symbol.y
# 1 1 117586 A1bg A1BG
# 2 2 232345 A2m A2M
# 3 9 17961 Nat2 NAT1
# 4 10 17960 Nat1 NAT2
# 5 12 16625 Serpina3c SERPINA3
# 6 13 67758 Aadac AADAC
homology <- function(taxid.x, taxid.y) {
# Assume that `gene_id` is unique
symbols <- rbind(
org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
org.Hs.eg.db::org.Hs.egSYMBOL2EG %>% as.data.frame()
)
path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
from_tsv() %>%
select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
filter(taxid %in% c(taxid.x, taxid.y)) %>%
mutate(taxid = if_else(taxid == taxid.x, "x", "y")) %>%
stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
# "multiple rows match for taxid"
suppressWarnings() %>%
select(gene_id.x, gene_id.y) %>%
merge(y = symbols %>% select(gene_id.x = gene_id, symbol.x = symbol), by = "gene_id.x") %>%
merge(y = symbols %>% select(gene_id.y = gene_id, symbol.y = symbol), by = "gene_id.y")
}
For example:
homology(taxid$mm, taxid$hs) %>%
head(11) %>%
{
.[, order(colnames(.))]
} %>%
mutate(seems.resonable = (tolower(symbol.x) == tolower(symbol.y))) %>%
rbind("...") %>%
kable()
| gene_id.x | gene_id.y | symbol.x | symbol.y | seems.resonable |
|---|---|---|---|---|
| 117586 | 1 | A1bg | A1BG | TRUE |
| 232345 | 2 | A2m | A2M | TRUE |
| 17961 | 9 | Nat2 | NAT1 | FALSE |
| 17960 | 10 | Nat1 | NAT2 | FALSE |
| 16625 | 12 | Serpina3c | SERPINA3 | FALSE |
| 67758 | 13 | Aadac | AADAC | TRUE |
| 227290 | 14 | Aamp | AAMP | TRUE |
| 11298 | 15 | Aanat | AANAT | TRUE |
| 234734 | 16 | Aars | AARS1 | FALSE |
| 268860 | 18 | Abat | ABAT | TRUE |
| 11303 | 19 | Abca1 | ABCA1 | TRUE |
| … | … | … | … | … |
We use that to add description/summary to mouse genes by homology.
gene.summary$mm %<>%
merge(
x = .,
y = merge(
x = gene.summary$hs %>%
select(gene_id, summ = summary, desc = description) %>%
mutate(desc = if_else(is.na(desc), desc, paste("By homology:", desc))) %>%
mutate(summ = if_else(is.na(summ), summ, paste("By homology:", summ))),
y = path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
from_tsv() %>%
filter(NCBI_Taxon_ID %in% taxid) %>%
select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
select(gene_id = gene_id.9606, alt_gene_id = gene_id.10090),
by = "gene_id",
all = FALSE
) %>%
select(gene_id = alt_gene_id, alt_desc = desc, alt_summ = summ),
by = "gene_id",
all.x = TRUE,
all.y = FALSE,
sort = FALSE
) %>%
mutate(summary = if_else(!is.na(summary), summary, alt_summ)) %>%
mutate(description = if_else(!is.na(description), description, alt_desc)) %>%
select(-alt_desc, -alt_summ)
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=10090: first taken
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=9606: first taken
Preview:
goi <-
list(
neuroinflammation = list(
name = "Neuroinflammation",
genes = unique(c(
# "Early response" transcription factors: Fos, Fosb, Junb
c("Fos", "Fosb", "Junb"),
# Wikipathways Neuroinflammation:
# https://www.wikipathways.org/instance/WP4919_r111814
# Downloaded from
# http://www.gsea-msigdb.org/gsea/msigdb/cards/WP_NEUROINFLAMMATION.html
c("Ascc1", "Chuk", "Fos", "Jun", "Mapk14", "Mapk8", "mt-Co1", "mt-Co2", "Mtor", "Nfkbia", "Nos2", "Rela", "Tlr4")
))
),
# From 2021-Manberg-...-Lewandowski, p.643
pvf = list(
name = "PVF",
genes = c("Spp1", "Col6a1", "Col1a1", "Mmp2")
),
# Collagen
col = list(
name = "Collagen",
genes = c(
"Col10a1", "Col11a1", "Col11a2", "Col12a1", "Col13a1", "Col14a1",
"Col15a1", "Col16a1", "Col17a1", "Col18a1", "Col19a1", "Col1a1",
"Col1a2", "Col20a1", "Col22a1", "Col23a1", "Col24a1", "Col25a1",
"Col26a1", "Col27a1", "Col28a1", "Col2a1", "Col3a1", "Col4a1",
"Col4a2", "Col4a3", "Col4a3bp", "Col4a4", "Col4a5", "Col4a6",
"Col5a1", "Col5a2", "Col5a3", "Col6a1", "Col6a2", "Col6a3",
"Col6a4", "Col6a5", "Col6a6", "Col7a1", "Col8a1", "Col8a2",
"Col9a1", "Col9a2", "Col9a3", "Colec10", "Colec11", "Colec12",
"Colgalt1", "Colgalt2", "Colq"
)
# Obtained using
# sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^Col")] } %>% sort()
),
# Mitochondrial
mt = list(
name = "Mitochondrial",
genes = c(
"mt-Atp6", "mt-Atp8", "mt-Co1", "mt-Co2", "mt-Co3", "mt-Cytb",
"mt-Nd1", "mt-Nd2", "mt-Nd3", "mt-Nd4", "mt-Nd4l", "mt-Nd5", "mt-Nd6"
)
),
# Obtained using (e.g.)
# sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^mt-")] } %>% sort()
air = list(
name = "Aerobic respiration",
genes = c(
# GO:1903715 (regulation of aerobic respiration)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:1903715
c("Actn3", "Akt1", "Bnip3", "Cbfa2t3", "Hif1a", "Ide", "Nop53", "Shmt2", "Trpv4", "Vcp"),
# GO:0009060 (aerobic respiration) minus GO:1903715 and minus 'mt-Co3'
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0009060
c("Aco1", "Aco2", "Adsl", "Afg1l", "Atp5f1d", "Bloc1s1", "Cat", "Cox10", "Cox4i1", "Cox4i2", "Cox5a", "Cox5b", "Cox6a1", "Cox6a2", "Cox7c", "Cs", "Csl", "Cycs", "Cyct", "Dhtkd1", "Dlat", "Dlst", "Fh", "Fxn", "Idh1", "Idh2", "Idh3a", "Idh3b", "Idh3g", "Ireb2", "Mdh1", "Mdh1b", "Mdh2", "Mtco1", "Mtfr1", "Mtfr1l", "Mtfr2", "Mtnd1", "Mtnd4", "Mup1", "Mup11", "Mup2", "Mup3", "Mup4", "Mup5", "Mup6", "Ndufs4", "Ndufs7", "Ndufs8", "Ogdh", "Ogdhl", "Oxa1l", "Pank2", "Pdha1", "Pdha2", "Pdhb", "Proca1", "Q8BPC6", "Sdha", "Sdhaf2", "Sdhb", "Sdhc", "Sdhd", "Sirt3", "Sucla2", "Suclg1", "Suclg2", "Ucn", "Uqcr10", "Uqcrb", "Uqcrh")
)
),
fib_mig = list(
name = "Reg. fib. mig.",
genes = c(
# GO:0010762 (regulation of fibroblast migration)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0010762
c("Acta2", "Actr3", "Adipor2", "Ager", "Akap12", "Akt1", "Appl1", "Appl2", "Arhgap4", "Bag4", "Braf", "Cln3", "Coro1c", "Cygb", "Ddr2", "Dmtn", "Fer", "Fgf2", "Fgfr1", "Gna12", "Has1", "Hyal2", "Itgb1", "Itgb1bp1", "Itgb3", "MACIR", "Mmp1a", "Mta2", "Pak1", "Pak3", "Prkce", "Prr5l", "Ptk2", "Rac1", "Rcc2", "Rffl", "Sdc4", "Slc8a1", "Tgfb1", "Thbs1", "Tsc2", "Uts2", "Wdpcp")
)
),
glycolysis = list(
name = "Anaerobic glycolysis",
genes = c(
# GO:0006096 (glycolytic process / anaerobic glycolysis)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0006096
c("Actn3", "Adpgk", "Aldoa", "Aldoart1", "Aldoart2", "Aldob", "Aldoc", "App", "Bpgm", "Cbfa2t3", "Ddit4", "Dhtkd1", "Eif6", "Eno1", "Eno2", "Eno3", "Eno4", "Entpd5", "Ep300", "Esrrb", "Fbp1", "Foxk1", "Foxk2", "Gale", "Galk1", "Galt", "Gapdh", "Gapdhs", "Gck", "Git1", "Gm10358", "Gm11214", "Gm12117", "Gm15294", "Gm3839", "Gpd1", "Gpi", "Hdac4", "Hif1a", "Hk1", "Hk2", "Hk3", "Hkdc1", "Htr2a", "Ier3", "Ifng", "Igf1", "Il3", "Ins2", "Insr", "Jmjd8", "Khk", "Mif", "Mlxipl", "Mpi", "Mtch2", "Myc", "Myog", "Ncor1", "Nupr1", "Ogdh", "Ogt", "P2rx7", "Pfkfb2", "Pfkl", "Pfkm", "Pfkp", "Pgam1", "Pgam2", "Pgk1", "Pgk2", "Pklr", "Pkm", "Ppara", "Ppargc1a", "Prkaa1", "Prkaa2", "Prkag2", "Prkag3", "Prxl2c", "Psen1", "Sirt6", "Slc2a6", "Slc4a1", "Slc4a4", "Stat3", "Tigar", "Tkfc", "Tpi1", "Trex1", "Zbtb20", "Zbtb7a")
)
)
)
# Use a function because the list may change
get_goi_genes <- function() {
goi %>%
lapply(as.data.frame) %>%
dual(base::rbind) %>%
pull(genes) %>%
unique()
}
get_goi_genes()[1:7] %>%
c("...") %>%
t() %>%
kable()
| Fos | Fosb | Junb | Ascc1 | Chuk | Jun | Mapk14 | … |
# Return a vector `goi_set` indicating the gene set for each `symbol`
symbol2goiset <- function(genes) {
df <- data.frame(symbol = genes, goi_set = "Other")
for (goi in goi) {
df %<>% mutate(goi_set = if_else(symbol %in% goi$genes, goi$name, goi_set))
}
df %>% pull(goi_set, symbol)
}
show_gene_info_table <- function(df) {
df$symbols # need this column
df %>%
mutate(role = symbol2role(symbol)) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
mutate(gene_id = symbol2geneid(symbol)) %>%
merge(gene.summary$mm, all.x = TRUE, by = "gene_id", sort = FALSE) %>%
mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
mutate(description = paste0("<span style='font-size:70%'>", description, "</span>")) %>%
select(-summary) %>%
mutate(symbol = as.link(symbol)) %>%
DT::datatable(rownames = NULL, escape = FALSE, width = "100%")
}
get_goi_genes() %>%
sample(size = 3) %>%
data.frame(symbol = .) %>%
rbind("...") %>%
show_gene_info_table()
# TODO: normalize with edgeR?
# Plots the PCA, showing the expression of genes
show_goi_pca <- function(sce, genes, group.by = "celltype", colors = ramp(as.character(sce@colData[[group.by]]))) {
cbind(
# Reduced dim coordinates
sce@int_colData$reducedDims$PCA %>%
as.data.frame(row.names = colnames(sce)) %>%
select(x = PC1, y = PC2),
# Metadata
sce@colData %>%
as.data.frame(row.names = colnames(sce)) %>%
select(celltype = all_of(group.by)),
# Expression data for genes-of-interest
sce@assays@data$counts %>%
log1p() %>%
`[`(genes[genes %in% rownames(.)], ) %>%
t()
) %>%
# Make long table
reshape2::melt(
# Keep as separate columns:
id.vars = c("x", "y", "celltype"),
# These columns will be stacked, in a column with this name:
measure.vars = genes, variable.name = "symbol"
) %>%
# Plot
ggplot(aes(x = x, y = y, color = celltype)) +
geom_point(aes(size = value), alpha = 0.4) +
scale_color_manual(values = colors, breaks = gglast(colour)) +
# theme_void() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
) +
labs(color = group.by, size = "Expression") +
facet_wrap(vars(symbol))
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
scater::logNormCounts() %>%
scater::runPCA() %>%
show_goi_pca(goi$pvf$genes, group.by = "Cell_Cycle", colors = list(`G0` = "Red", `G1` = "Blue", `G2M` = "Orange", `S` = "DarkGreen", `Dead` = "Black"))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

# Aggregates genes by `group.by` for each geneset from `goi`
goi_agg_by_group <- function(sce, group.by = "celltype") {
goi %>%
lapply(function(goi) {
(dual(rbind))(as.list(group.by) %>% lapply(function(group.by) {
if (any(goi$genes %in% rownames(sce))) {
cbind(
# Metadata
sce@colData %>%
as.data.frame(row.names = colnames(sce)) %>%
select(subgroup = all_of(group.by)),
# Expression data for genes-of-interest
sce %>%
scater::logNormCounts() %>%
SingleCellExperiment::logcounts() %>%
take_rows(goi$genes) %>%
t()
) %>%
# Aggregate by sample
group_by(subgroup) %>%
summarise(across(everything(), mean)) %>%
# Make long table
reshape2::melt(
# Keep as separate columns:
id.vars = c("subgroup"),
# Other columns will be stacked, in a column with this name:
variable.name = "symbol"
# A column `value` contains their values
) %>%
mutate(group = group.by) %>%
mutate(goi_set = goi$name)
}
}))
}) %>%
# Relabel the elements of the list
dual(rbind) %>%
split(.$goi_set)
# cf.
# https://stackoverflow.com/questions/33775239/emulate-split-with-dplyr-group-by-return-a-list-of-data-frames
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
goi_agg_by_group(group.by = "Treatment") %>%
lapply(head)
## $`Aerobic respiration`
## subgroup symbol value group goi_set
## air.1 treat1 Actn3 1.904347 Treatment Aerobic respiration
## air.2 treat2 Actn3 2.312375 Treatment Aerobic respiration
## air.3 treat1 Akt1 7.215821 Treatment Aerobic respiration
## air.4 treat2 Akt1 7.179238 Treatment Aerobic respiration
## air.5 treat1 Bnip3 5.517770 Treatment Aerobic respiration
## air.6 treat2 Bnip3 5.843600 Treatment Aerobic respiration
##
## $`Anaerobic glycolysis`
## subgroup symbol value group goi_set
## glycolysis.1 treat1 Actn3 1.904347 Treatment Anaerobic glycolysis
## glycolysis.2 treat2 Actn3 2.312375 Treatment Anaerobic glycolysis
## glycolysis.3 treat1 Adpgk 2.892426 Treatment Anaerobic glycolysis
## glycolysis.4 treat2 Adpgk 2.847102 Treatment Anaerobic glycolysis
## glycolysis.5 treat1 Aldoa 3.626778 Treatment Anaerobic glycolysis
## glycolysis.6 treat2 Aldoa 4.233222 Treatment Anaerobic glycolysis
##
## $Collagen
## subgroup symbol value group goi_set
## col.1 treat1 Col10a1 6.798632 Treatment Collagen
## col.2 treat2 Col10a1 4.794314 Treatment Collagen
## col.3 treat1 Col11a1 4.770213 Treatment Collagen
## col.4 treat2 Col11a1 5.417654 Treatment Collagen
## col.5 treat1 Col11a2 9.131331 Treatment Collagen
## col.6 treat2 Col11a2 9.142413 Treatment Collagen
##
## $Mitochondrial
## subgroup symbol value group goi_set
## mt.1 treat1 mt-Atp6 9.198091 Treatment Mitochondrial
## mt.2 treat2 mt-Atp6 8.463647 Treatment Mitochondrial
## mt.3 treat1 mt-Atp8 8.300985 Treatment Mitochondrial
## mt.4 treat2 mt-Atp8 8.090240 Treatment Mitochondrial
## mt.5 treat1 mt-Co1 4.740320 Treatment Mitochondrial
## mt.6 treat2 mt-Co1 4.702137 Treatment Mitochondrial
##
## $Neuroinflammation
## subgroup symbol value group goi_set
## neuroinflammation.1 treat1 Fos 9.214055 Treatment Neuroinflammation
## neuroinflammation.2 treat2 Fos 8.945624 Treatment Neuroinflammation
## neuroinflammation.3 treat1 Fosb 9.204561 Treatment Neuroinflammation
## neuroinflammation.4 treat2 Fosb 9.065280 Treatment Neuroinflammation
## neuroinflammation.5 treat1 Junb 3.195598 Treatment Neuroinflammation
## neuroinflammation.6 treat2 Junb 2.399009 Treatment Neuroinflammation
##
## $PVF
## subgroup symbol value group goi_set
## pvf.1 treat1 Spp1 6.472082 Treatment PVF
## pvf.2 treat2 Spp1 7.130099 Treatment PVF
## pvf.3 treat1 Col6a1 7.754018 Treatment PVF
## pvf.4 treat2 Col6a1 7.328395 Treatment PVF
## pvf.5 treat1 Col1a1 5.324151 Treatment PVF
## pvf.6 treat2 Col1a1 5.688841 Treatment PVF
##
## $`Reg. fib. mig.`
## subgroup symbol value group goi_set
## fib_mig.1 treat1 Acta2 9.492388 Treatment Reg. fib. mig.
## fib_mig.2 treat2 Acta2 9.392135 Treatment Reg. fib. mig.
## fib_mig.3 treat1 Actr3 2.322423 Treatment Reg. fib. mig.
## fib_mig.4 treat2 Actr3 2.649601 Treatment Reg. fib. mig.
## fib_mig.5 treat1 Adipor2 1.427232 Treatment Reg. fib. mig.
## fib_mig.6 treat2 Adipor2 1.139268 Treatment Reg. fib. mig.
# For each cell, aggregate each gene set
# Returns a sample x geneset table (with additional metadata columns)
goi_agg_by_genes <- function(sce) {
cbind(
# Sample metadata
sce@colData %>%
as.data.frame() %>%
select(any_of(c("cluster", "celltype", "group", "subgroup", "Group"))),
# Mean expression for genes-of-interest
lapply(goi, function(goi) {
list() %>% within({
assign(
goi$name,
sce@assays@data$counts %>%
log1p() %>%
take_rows(goi$genes) %>%
colMeans()
)
})
}),
# Library size
data.frame(lib.size = sce@assays@data$counts %>% colSums()),
# Reduced dim coordinates
sce %>%
scater::logNormCounts() %>%
scater::runPCA() %>%
SingleCellExperiment::reducedDim("PCA") %>%
as.data.frame() %>%
select(x = PC1, y = PC2, z = PC3)
)
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
goi_agg_by_genes() %>%
head() %>%
DT::datatable(width = "100%", options = list(scrollX = TRUE))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.
This is a set of differential genes in kidney fibroblast activation from Deng et al., 2021.
fb_genes_si7 <- path_to("*/Deng-Wei-2021/*/SI7.*.gz") %>% from_tsv()
# fb_genes_si8 <- path_to("*/Deng-Wei-2021/*/SI8.*.gz") %>% from_tsv()
show_tbl <- function(tbl) {
tbl %<>%
ind2col("symbol") %>%
mutate(symbol = as.link(symbol)) %>%
select(symbol, logFC)
tbl %>%
DT::datatable(escape = FALSE) %>%
DT::formatSignif(colnames(tbl %>% select(-symbol)), digits = 3)
}
The list of genes is from Dorrier-Aran-…-Daneman, 2021, Extended Data Fig. 4.
if (!("c0" %in% names(goi))) {
goi %<>%
c(
list(
`0` = c("Col1a2", "Bgn", "Col3a1", "Col8a1", "Igf1", "Ifitm1", "Col1a1", "Cfh", "Sparc", "B2m"),
`1` = c("Apod", "Trf", "Dcn", "Gsn", "Gpc3", "Tgfbi", "Ccl11", "Smoc2", "Lum", "Cst3"),
`2` = c("Lgals1", "Timp1", "Rpl41", "Rps18", "Rps12", "Col4a1", "Cxcl10"),
`3` = c("Junb", "Fosb", "Fos", "Ptn", "Igfbp2", "Klf4", "Clec3b", "Apoe", "Rbp4", "Vtn"),
`4` = c("Rbp1", "Fth1", "Aldh1a2", "Fn1", "Igfbp3", "Slc7a11", "Ptgds", "Timp3", "Igfbp5"),
`5` = c("Ptch1", "Igfbp6", "Tmsb4x", "S100a6", "Igfbp7", "Mgp", "Saa1"),
`6` = c("Itih5", "Spp1", "Klf2", "Ubb", "Jund", "Mt1"),
`7` = c("Rgs5", "Acta2", "Myl9", "Gm13889", "Tagln", "Tpm2", "Actb")
) %>%
stack() %>%
mutate(cluster = paste0("c", ind), name = paste0("Dm #", ind)) %>%
split(.$cluster) %>%
lapply(function(c) list(name = unique(c$name), genes = c$values))
)
}
heatmap <- function(sce, colors = NULL, anno_col = NULL, anno_row = NULL, ...) {
# `colors` should be like list(`negative` = "Blue", `positive` = "Red", ...)
colors %<>% c(NULL) %>% unlist()
if (!is.null(anno_row)) {
stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
}
if (!is.null(anno_col)) {
stopifnot(assertthat::are_equal(colnames(sce), rownames(anno_col)))
}
anno_col %<>%
list(
sce@colData %>%
as.data.frame() %>%
select(any_of(c("celltype", "subgroup", "Group", "cluster", "Treatment")))
) %>%
dual(cbind)
# print(anno_col %>% as.list() %>% lapply(\(x) if (all(x %in% names(colors))) colors[unique(as.character(x))] else ramp(x)))
sce %>%
SingleCellExperiment::counts() %>%
log1p() %>%
{
# (.) - rowMeans(.)
# (.) / rowSums(.)
} %>%
pheatmap::pheatmap(
show_colnames = FALSE,
treeheight_row = 0,
treeheight_col = 0,
cluster_rows = FALSE,
fontsize = 6,
color = grDevices::colorRampPalette(c("black", "yellow"))(10),
annotation_col = anno_col,
annotation_row = anno_row,
annotation_colors = c(
anno_row %>% as.list() %>% lapply(ramp),
anno_col %>% as.list() %>% lapply(\(x) if (all(x %in% names(colors))) colors[unique(as.character(x))] %>% unlist() else ramp(x))
),
...
)
}
show_dm_heatmap <- function(sce, anno_row = NULL, ...) {
dm_genes <-
goi[grep("c[0-9]", goi %>% names())] %>%
lapply(as.data.frame) %>%
dual(rbind) %>%
pull(name, genes)
if (!is.null(anno_row)) {
stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
}
anno_row %<>%
take_rows(names(dm_genes))
sce %<>%
take_rows(names(dm_genes))
anno_row %<>%
list(data.frame(dm.cluster = dm_genes[rownames(sce)])) %>%
dual(cbind)
heatmap(sce, anno_row = anno_row, ...)
}
unlist(goi[grep("c[0-9]", goi %>% names())]) %>%
mock_sce(ncells = 77) %>%
{
(.)[, order((.)$celltype)]
} %>%
show_dm_heatmap(
colors = colors,
cluster_cols = FALSE,
anno_col = data.frame(x = ((1:ncol(.)) %% 2), row.names = colnames(.)),
anno_row = data.frame(y = ((1:nrow(.)) %% 2), row.names = rownames(.))
)

The following table shows all the “genes of interest”.
get_goi_genes() %>%
data.frame(symbol = .) %>%
show_gene_info_table()
Small dataset to run through quickly.
sce_dummy <- readRDS(path_to("*/*/sce_dummy.rds"))
# Made with:
# load_dataset$dm(size = 77) %>% keep_only_goi() %>% saveRDS(file = "sce_dummy.rds")
## class: SingleCellExperiment
## dim: 267 74
## metadata(0):
## assays(1): counts
## rownames(267): Fos Fosb ... Zbtb20 Zbtb7a
## rowData names(0):
## colnames(74): healthy2_AATCCAGCATAGTAAG EAE1_AAAGATGTCGGAAATA ...
## EAE1_CTGTTTAAGAGTCTGG EAE1_TTAGTTCTCTCTGAGA
## colData names(4): group subgroup cluster celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
data.frame(
x = sce_dummy@assays@data$counts %>% colSums(),
subgroup = sce_dummy$subgroup
) %>%
ggplot(aes(x = x, color = subgroup)) +
scale_color_manual(values = colors[gglast(colour)]) +
geom_density()

scater_attach <- function(sce) {
scater::addPerCellQC(
x = sce,
subsets = list(Mito = grep("mt-", rownames(sce)))
) %>%
scater::logNormCounts() %>%
scater::runPCA(name = "PCA", ncomponents = 20) %>%
scater::runTSNE(perplexity = 10, dimred = "PCA") %>%
scater::runUMAP()
}
sce_dummy %<>% scater_attach()
## Read the 74 x 20 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 2, perplexity = 10.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.595690)!
## Learning embedding...
## Iteration 50: error is 63.296339 (50 iterations in 0.01 seconds)
## Iteration 100: error is 60.037220 (50 iterations in 0.01 seconds)
## Iteration 150: error is 64.801343 (50 iterations in 0.01 seconds)
## Iteration 200: error is 62.466966 (50 iterations in 0.01 seconds)
## Iteration 250: error is 64.566597 (50 iterations in 0.01 seconds)
## Iteration 300: error is 1.927342 (50 iterations in 0.00 seconds)
## Iteration 350: error is 1.369595 (50 iterations in 0.01 seconds)
## Iteration 400: error is 1.140789 (50 iterations in 0.00 seconds)
## Iteration 450: error is 0.978576 (50 iterations in 0.00 seconds)
## Iteration 500: error is 0.799270 (50 iterations in 0.00 seconds)
## Iteration 550: error is 0.704285 (50 iterations in 0.00 seconds)
## Iteration 600: error is 0.672386 (50 iterations in 0.00 seconds)
## Iteration 650: error is 0.660327 (50 iterations in 0.00 seconds)
## Iteration 700: error is 0.643767 (50 iterations in 0.00 seconds)
## Iteration 750: error is 0.638919 (50 iterations in 0.00 seconds)
## Iteration 800: error is 0.638740 (50 iterations in 0.00 seconds)
## Iteration 850: error is 0.640056 (50 iterations in 0.01 seconds)
## Iteration 900: error is 0.641027 (50 iterations in 0.01 seconds)
## Iteration 950: error is 0.641538 (50 iterations in 0.00 seconds)
## Iteration 1000: error is 0.641952 (50 iterations in 0.00 seconds)
## Fitting performed in 0.12 seconds.
## 07:36:29 UMAP embedding parameters a = 1.896 b = 0.8006
## 07:36:29 Read 74 rows and found 267 numeric columns
## 07:36:29 Reducing X column dimension to 50 via PCA
## 07:36:29 PCA: 50 components explained 95.41% variance
## 07:36:29 Using FNN for neighbor search, n_neighbors = 15
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/FNN/libs/FNN.so") ...
## 07:36:30 Commencing smooth kNN distance calibration using 4 threads
## 07:36:31 Initializing from normalized Laplacian + noise
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSpectra/libs/RSpectra.so") ...
## 07:36:31 Commencing optimization for 500 epochs, with 1758 positive edges
## 07:36:31 Optimization finished
sce_dummy %>%
scater::makePerCellDF() %>%
head(3) %>%
DT::datatable(width = "100%")
scater_show <- function(sce, color.by = "celltype", colors = NULL) {
items <- sce@colData[[color.by]]
colors <-
if (all(items %in% names(colors))) colors[items] else ramp(items, palette = "Set 2")
sce %>%
{
function(which_dimred) {
suppressMessages(
scater::plotReducedDim(
dimred = which_dimred,
object = (.),
colour_by = color.by
) +
scale_color_manual(values = colors) + # no need for `breaks`
labs(color = color.by) +
guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))) +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
)
}
} %>%
{
gridExtra::arrangeGrob((.)("PCA"), (.)("TSNE"), (.)("UMAP"), nrow = 3)
}
}
sce_dummy %>%
scater_show(color.by = "subgroup", colors = colors) %>%
gridExtra::grid.arrange()

Template, not for direct use.
pca3d_template <- function(sce, color.by = "celltype", colors = ramp(sce@colData[[color.by]])) {
data.frame(
color_group = sce@colData[[color.by]],
lib.size = sce@assays@data$counts %>% colSums(),
SingleCellExperiment::reducedDim(sce, "PCA") %>%
as.data.frame() %>%
select(PC1, PC2, PC3)
) %>%
group_by(color_group) %>%
sample(size = 30, replace = TRUE) %>%
ungroup() %>%
# print() %>%
plotly::plot_ly() %>%
plotly::add_markers(
x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(size = 2, opacity = 1),
color = ~color_group,
colors = colors
) %>%
plotly::layout(
scene = list(
xaxis = list(title = "PC1", showticklabels = FALSE),
yaxis = list(title = "PC2", showticklabels = FALSE),
zaxis = list(title = "PC3", showticklabels = FALSE)
)
)
}
sce_dummy %>% pca3d_template(color.by = "subgroup", colors = colors)
default_cyclic_genes <-
# Convert the "cyclic genes" of `Revelio` from Hs to Mm
Revelio::revelioTestData_cyclicGenes %>%
lapply(function(genes) {
merge(
x = data.frame(symbol.x = genes),
y = homology(taxid$hs, taxid$mm),
by = "symbol.x",
all.x = TRUE
) %>%
pull(symbol.y) %>%
sort(na.last = TRUE)
})
default_cyclic_genes %>%
as.data.frame() %>%
head() %>%
rbind("...") %>%
kable()
| G1.S | S | G2 | G2.M | M.G1 |
|---|---|---|---|---|
| Abca7 | a | 1700001L19Rik | Adh4 | 1700102P08Rik |
| Acd | Abcc2 | 1700066M21Rik | Ahi1 | 2700049A03Rik |
| Acyp1 | Abcc5 | Alkbh1 | Akirin2 | Afap1 |
| Adamts1 | Abhd10 | Anln | Ankrd40 | Agfg1 |
| Adck2 | Adam22 | Ap3d1 | Anln | Agpat3 |
| Adcy6 | Arhgap42 | Arhgap19 | Arhgap19 | Akap13 |
| … | … | … | … | … |
The DC1-DC2 plane is supposed to show a circle reflecting the cell cycle (cf. Fig 1 in Schwabe et al., 2020).
cell_cycle <- function(sce, batch.by = NA, cyclic_genes = default_cyclic_genes) {
list(
counts = sce@assays@data$counts,
orig.id = colnames(sce),
batch = if (is.na(batch.by)) "cell" else paste0(sce@colData[[batch.by]], "_")
) %>%
within({
colnames(counts) <- paste0(batch, 1:ncol(counts))
orig.id <- data.frame(old = orig.id, new = colnames(counts)) %>% pull(old, new)
}) %>%
with({
counts %>%
Revelio::createRevelioObject(cyclicGenes = cyclic_genes, lowernGeneCutoff = 0) %>%
Revelio::getCellCyclePhaseAssignInformation() %>%
Revelio::getPCAData() %>%
Revelio::getOptimalRotation() %>%
list(cyclic = .) %>%
with({
assertthat::assert_that(all(
rownames(cyclic@cellInfo) == colnames(cyclic@transformedData$pca$data)
))
cbind(
cyclic@transformedData$pca$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
cyclic@transformedData$dc$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
cyclic@cellInfo
)
}) %>%
data.frame(row.names = orig.id[rownames(.)])
}) %>%
cbind(as.data.frame(sce@colData)[rownames(.), , drop = FALSE])
}
cell_cycle_plot2d <- function(cco, colors = ramp(cco$ccPhase)) {
cco %>%
ggplot(aes(x = DC1, y = DC2, color = ccPhase)) +
geom_point(size = 3, stroke = 1) +
scale_color_manual(values = colors, breaks = gglast(colour))
}
cco_revelio <-
cell_cycle(
Revelio::revelioTestData_rawDataMatrix %>%
make_sce(
counts = .,
coldata = data.frame(
x = colnames(.) %>% strsplit("_") %>% dual(rbind),
row.names = colnames(.)
) %>% select(batch = x.1)
),
batch.by = "batch",
cyclic_genes = Revelio::revelioTestData_cyclicGenes
)
## 2021-06-28 21:27:59: calculating optimal rotation: 2021-06-28 21:27:59: calculating PCA: 2021-06-28 21:27:59: assigning cell cycle phases: 2021-06-28 21:27:59: reading data: 4.8secs
## 27.48secs
## 50.82secs
## 1.23mins
cco_revelio %>%
ggplot(aes(x = DC1, y = DC2, color = ccPhase, shape = batch)) +
geom_point(size = 3, stroke = 1) +
scale_color_manual(values = colors, breaks = gglast(colour))

cco_mock <-
unlist(as.list(default_cyclic_genes)) %>%
{
(.)[!is.na(.)]
} %>%
mock_sce(ncells = 33) %>%
cell_cycle()
## 2021-06-28 21:29:14: calculating optimal rotation: 2021-06-28 21:29:14: calculating PCA: 2021-06-28 21:29:14: assigning cell cycle phases: 2021-06-28 21:29:14: reading data: 0.01secs
## 0.36secs
## 0.41secs
## 43.04secs
cco_mock %>%
ggplot(aes(x = DC1, y = DC2, color = celltype, fill = ccPhase)) +
geom_point(shape = 21, size = 5) +
ggplot2::scale_color_manual(values = colors, breaks = gglast(colour)) +
ggplot2::scale_fill_manual(values = colors, breaks = gglast(fill))

cell_cycle_plot3d <- function(cco, colors = ramp(cco$ccPhase)) {
cco %>%
with({
plotly::plot_ly() %>%
plotly::add_markers(
x = DC1, y = DC2, z = DC3,
size = 4,
color = ccPhase,
colors = colors
)
})
}
cco_mock %>% cell_cycle_plot3d()
cco_mock %>%
with({
rgl::plot3d(DC1, DC2, DC3, size = 3, type = "s", col = ramp(ccPhase)[ccPhase])
rgl::rglwidget()
})
deseq <- function(sce, design) {
sce %>%
DESeq2::DESeqDataSet(design = design) %>%
DESeq2::estimateSizeFactors(type = "poscounts") %>%
DESeq2::DESeq(quiet = TRUE, fitType = "local") %>%
DESeq2::results() %>%
as.data.frame() %>%
select(base = baseMean, lfc = log2FoldChange, p = pvalue) %>%
mutate(base = log1p(base)) %>%
tidyr::drop_na() %>%
mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
ind2col("symbol") %>%
list(table = ., design = design)
}
edger <- function(sce, design) {
sce %>%
edgeR::estimateDisp(robust = TRUE) %>%
# edgeR::calcNormFactors(method = "TMM") %>%
# edgeR::estimateGLMTagwiseDisp(design = design) %>%
edgeR::glmFit(design = model.matrix(design, data = sce@colData)) %>%
edgeR::glmLRT(coef = 2) %>%
`$`(table) %>%
select(base = logCPM, lfc = logFC, p = PValue, LR = LR) %>%
tidyr::drop_na() %>%
mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
ind2col("symbol") %>%
list(table = ., design = design)
}
# Hacky fctn to determine colors for `lfc > 0` and `lfc < 0`
lfc_colors <- function(sce, design, colors) {
f <- terms(design) %>% attr("factors")
stopifnot((nrow(f) == 1) && (ncol(f) == 1))
f <- sce@colData[[f %>% colnames()]]
cc <- colors[f %>% levels()]
cc <- c(cc[[1]], (rowMeans(col2rgb(cc[-1])) / 255) %>% as.list() %>% dual(rgb))
(. %>% (colorRamp(cc)) %>% RGB())
}
compute_de <- function(sce, design) {
list(
deseq = suppressMessages(deseq(sce, design)),
edger = suppressMessages(edger(sce, design))
) %>%
with({
list(
table = rbind(
deseq$table %>%
select(symbol, base, lfc, p) %>%
mutate(src = "DESeq2"),
edger$table %>%
select(symbol, base, lfc, p) %>%
mutate(src = "edgeR")
),
design = design
)
}) %>%
within({
# Attach LFC color (not very important)
table %<>%
group_by(src) %>%
mutate(color = (lfc_colors(sce, design, colors))(percent_rank(lfc))) %>%
ungroup()
})
}
sce_dummy.de <- sce_dummy %>% compute_de(~group)
sce_dummy.de$table %>%
head() %>%
rbind("...") %>%
kable()
| symbol | base | lfc | p | src | color |
|---|---|---|---|---|---|
| Fos | 1.94433786752507 | 1.03952791369199 | 0.0350644319817966 | DESeq2 | #C98144FF |
| Fosb | 1.32188433643544 | 0.392699195334891 | 0.94357785439112 | DESeq2 | #A09B36FF |
| Junb | 1.71655787188708 | 0.618371363877933 | 0.403770457436529 | DESeq2 | #B0913BFF |
| Spp1 | 0.685515786871741 | 1.75726212594778 | 0.121724809299487 | DESeq2 | #E4704DFF |
| Col6a1 | 0.644138066638724 | 0.767376344078733 | 0.324044869548652 | DESeq2 | #B68D3DFF |
| Col1a1 | 2.86899094589606 | 1.87748518409641 | 1.75462356248164e-09 | DESeq2 | #E86E4EFF |
| … | … | … | … | … | … |
show_de_table <- function(.) {
(.) %>%
group_by(src) %>%
slice_min(p, n = 777) %>%
slice_max(abs(lfc), n = 777) %>%
ungroup() %>%
merge(
y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
all.x = TRUE,
by = "symbol"
) %>%
merge(
y = gene.summary$mm %>% select(gene_id, description, summary),
all.x = TRUE,
by = "gene_id"
) %>%
select(-any_of("gene_id")) %>%
select(-any_of("color")) %>%
# Role: TF, Rec, Lig, etc
mutate(role = symbol2role(symbol)) %>%
mutate(symbol = as.link(symbol)) %>%
mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
select(-summary) %>%
mutate(description = paste0("<span style='font-size: 70%;'>", description, "</span>")) %>%
DT::datatable(escape = FALSE) %>%
DT::formatSignif(c("base", "lfc", "p"), digits = 3)
}
sce_dummy.de$table %>%
group_by(src) %>%
slice_min(p, n = 33) %>%
ungroup() %>%
show_de_table()
show_de_heatmap <- function(sce, de_table, ...) {
top <- de_table %>%
group_by(lfc > 0) %>%
slice_min(p, n = 33, with_ties = FALSE) %>%
ungroup() %>%
arrange(lfc) %>%
select(symbol, lfc, p) %>%
mutate(`-log10(p)` = -log10(p)) %>%
by_col1() %>%
`[`(rownames(.) %in% rownames(sce), )
sce[rownames(top), ] %>%
heatmap(anno_row = top, ...)
}
sce_dummy %>%
show_de_heatmap(sce_dummy.de$table %>% filter(src == "edgeR"), colors = colors)

show_de_volcano <- function(de_table) {
de_table %<>%
mutate(goi_set = symbol2goiset(symbol)) %>%
group_by(src) %>%
arrange(lfc) %>%
mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
ungroup() %>%
mutate(label = ifelse(is_out, symbol, ""))
ggplot(de_table, aes(x = lfc, y = -log10(p))) +
# All genes
geom_point(color = "gray") +
# Color by geneset
geom_point(
data = de_table %>% filter(goi_set != "Other"),
aes(x = lfc, y = -log10(p), color = goi_set)
) +
scale_color_manual(values = ramp(de_table$goi_set)) +
labs(color = "Gene set") +
# Label outliers
geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 4), size = 3, color = "blue") +
facet_grid(. ~ src, scale = "free") +
theme(legend.position = "bottom")
}
sce_dummy.de$table %>% show_de_volcano()

show_de_basecamp <- function(.) {
(.) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
group_by(goi_set) %>%
arrange(lfc) %>%
mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
mutate(is_out = is_out | (min(tail(sort(base), 5)) <= base)) %>%
ungroup() %>%
mutate(label = ifelse(is_out, symbol, "")) %>%
ggplot(aes(x = lfc, y = base)) +
geom_point(data = (.), alpha = 0.5, size = 1, color = "grey") +
geom_point(color = "black", size = 0.5) +
scale_y_log10() +
geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 0.01), size = 2, color = "blue") +
facet_wrap(~goi_set, scale = "free_y") +
# Note: put labs(x = "..") outside
labs(y = "Expression") +
theme(
legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
)
}
sce_dummy.de$table %>%
show_de_basecamp() +
labs(x = "Condition1 <-- LFC --> Condition2")

show_de_cmp_lfc <- function(table) {
merge(
table %>% filter(src == "DESeq2"),
table %>% filter(src == "edgeR"),
by = "symbol",
all = TRUE
) %>%
# tidyr::replace_na(list(p.x = 0, p.y = 0)) %>%
tidyr::drop_na() %>%
mutate(sig.x = (p.x < quantile(p.x, 0.01))) %>%
mutate(sig.y = (p.y < quantile(p.y, 0.01))) %>%
mutate(sig = if_else(sig.x & sig.y, "Intersection", if_else(sig.x, "DESeq", if_else(sig.y, "edgeR", "")))) %>%
mutate(label = if_else(sig.x | sig.y, symbol, "")) %>%
{
ggplot((.) %>% filter(sig != ""), aes(x = lfc.x, y = lfc.y)) +
geom_point(data = (.) %>% filter(sig == ""), stroke = 0, alpha = 0.2) +
geom_point(aes(color = sig)) +
labs(x = "LFC by DESeq2", y = "LFC by edgeR", color = "Top %1 signif.") +
geom_text(aes(label = label, color = sig), hjust = -0.3, position = position_jitter(h = 0.05), size = 3, alpha = 0.8, show.legend = FALSE) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed")
}
}
sce_dummy.de$table %>% show_de_cmp_lfc()

compute_go <- function(de_table, n = 33) {
de_table %>%
mutate(direction = if_else(lfc > 0, "up", "dn")) %>%
group_by(direction) %>%
slice_min(p, n = n, with_ties = TRUE) %>%
slice_max(abs(lfc), n = n, with_ties = FALSE) %>%
split(.$direction) %>%
within({
f <- (function(x) x$symbol[1:5] %>% paste(collapse = ", "))
message(paste("Up:", f(up), "..."))
message(paste("Dn:", f(dn), "..."))
rm(f)
}) %>%
lapply(function(de_table) {
de_table %>%
# Get the Entrez IDs of our genes
mutate(gene_id = symbol2geneid(symbol)) %>%
pull(gene_id) %>%
# GO enrichment analysis
limma::goana(species = "Mm") %>%
ind2col("category") %>%
rename(p = P.DE, `#DE` = DE, `#cat` = N, GO = Ont, term = Term) %>%
# Limit selection
filter(p <= 0.05) %>%
arrange(p)
})
}
show_go_table <- function(.) {
(.) %>%
with({
rbind(
up %>% mutate(direction = "lfc_up"),
dn %>% mutate(direction = "lfc_down")
) %>%
arrange(p)
}) %>%
mutate(category = if_else(grepl("^GO:", category), as.link(category), category)) %>%
mutate(category = paste0("<span style='white-space:nowrap;'>", category, "(", GO, ")", "</span>")) %>%
mutate(direction = if_else(grepl("lfc_u", direction), paste0("<span style='color:darkred'>", direction, "</span>"), direction)) %>%
mutate(direction = if_else(grepl("lfc_d", direction), paste0("<span style='color:darkgreen'>", direction, "</span>"), direction)) %>%
dplyr::select(any_of(c("category", "direction", "p", "#DE", "#cat", "term"))) %>%
DT::datatable(escape = FALSE, width = "100%") %>%
DT::formatSignif("p", digits = 3)
}
sce_dummy.go <-
sce_dummy.de$table %>%
compute_go()
## Up: Col4a2, Col4a1, Ier3, Col18a1, Col8a1 ...
## Dn: Col9a2, Pdhb, Col9a2, Ddit4, Col4a6 ...
rbind(
sce_dummy.go$up %>% head(2),
sce_dummy.go$dn %>% head(2),
"..."
) %>%
DT::datatable(escape = FALSE, width = "100%")
sce_dummy.go %>% show_go_table()
# If A and B are SingleCellExperiment-like, then `cosine` computes
# the cosine similarity of samples of A against samples of B
cosine <- function(A, B) {
A <- A@assays@data$counts
B <- B@assays@data$counts
ii <- intersect(rownames(A), rownames(B))
A <- t(as.matrix(A[ii, , drop = FALSE]))
B <- t(as.matrix(B[ii, , drop = FALSE]))
A <- A / sqrt(rowSums(A * A))
B <- B / sqrt(rowSums(B * B))
A %*% t(B)
}
Example:
cosine(
sce_dummy[, sce_dummy$subgroup == "healthy1"],
sce_dummy[, sce_dummy$subgroup == "healthy1"]
) %>%
pheatmap::pheatmap()

coex <- function(sce) {
# Filter for the correlation matrix
f <- function(.) ((.) != 0)
sce %>%
SingleCellExperiment::counts() %>%
t() %>%
# stats::cor(method = "spearman") %>%
Hmisc::rcorr(type = "spearman") %>%
`$`(r) %>%
{
with(
(upper.tri(.) * (.)) %>% f() %>% which(arr.ind = TRUE) %>% as.data.frame(),
data.frame(a = rownames(.)[row], b = colnames(.)[col], w = (.)[cbind(row, col)])
)
}
}
show_coex_graph <- function(coex_df, de_table, n = 77) {
coex_df %<>% as.data.frame()
# Note: the graph vertices are pulled from the de_table
# Only those occurring in coex_df$a or .$b are retained
stopifnot(!any(duplicated(de_table$symbol)))
# Color of vertex labels
# de_table %>% pull(color, symbol)
stopifnot(all(c("a", "b", "w") %in% colnames(coex_df)))
ex_graph <-
coex_df %>%
group_by(w > 0) %>%
mutate(color = ((1 + w / max(abs(w))) / 2)) %>%
slice_max(abs(w), n = n) %>%
ungroup() %>%
mutate(color = colorRamp(c("blue", "white", "red"))(color) %>% RGB(alpha = 0.2)) %>%
select(a, b, w, color) %>%
igraph::graph_from_data_frame(
directed = FALSE,
vertices = (
de_table %>%
filter((symbol %in% coex_df$a) | (symbol %in% coex_df$b)) %>%
select(symbol, color)
)
)
# Graphs based on OmniPath
graphs <- list(
tf = omnipath.tf,
lr = omnipath.lr,
ia = omnipath.in %>%
anti_join(omnipath.tf, by = c("source", "target")) %>%
anti_join(omnipath.lr, by = c("source", "target"))
) %>%
lapply(function(df) {
df %>%
mutate(sign = is_stimulation - is_inhibition) %>%
select(a = source_genesymbol, b = target_genesymbol, sign) %>%
filter(a %in% igraph::V(ex_graph)$name) %>%
filter(b %in% igraph::V(ex_graph)$name) %>%
# group_by(a, b) %>% summarize(sign = sum(sign)) %>%
filter(sign != 0) %>%
igraph::graph_from_data_frame(directed = TRUE)
}) %>%
within({
ex <- ex_graph
})
rm(ex_graph)
# for layouting
set.seed(43)
graph_layout <-
(
graphs$ex
# + igraph::as.undirected(graphs$tf)
# + igraph::as.undirected(graphs$lr)
+ igraph::as.undirected(graphs$ia)
) %>%
# igraph::layout_with_kk(dim = 2, kkconst = igraph::vcount(.)) %>%
igraph::layout_with_dh(maxiter = 33) %>%
# igraph::layout_with_lgl() %>%
# igraph::layout_with_fr(start.temp = 1e-1) %>%
as.data.frame() %>%
magrittr::set_colnames(c("x", "y")) %>%
magrittr::set_rownames(igraph::V(graphs$ex)$name)
graphs %>%
lapply(function(graph) {
list(graph = graph, pos = graph_layout) %>%
within({
# Edges as dataframe
E <-
graph %>%
igraph::get.edgelist() %>%
magrittr::set_colnames(c("a", "b")) %>%
as.data.frame()
# Attach segment data
segment <-
cbind(
take_rows(pos, E$a) %>% magrittr::set_colnames(c("x", "y")),
take_rows(pos, E$b) %>% magrittr::set_colnames(c("xend", "yend"))
) %>%
mutate(
x = x + (xend - x) * 0.03, xend = xend - (xend - x) * 0.1,
y = y + (yend - y) * 0.03, yend = yend - (yend - y) * 0.1
)
})
}) %>%
with({
list(
slide1 = list(
alpha = list(ex = 2e-2, tf = 0.20, lr = 3e-2, ia = 3e-2),
title = "TF -> target"
)
, slide2 = list(
alpha = list(ex = 2e-2, tf = 3e-2, lr = 0.20, ia = 3e-2),
title = "Ligand -> receptor"
)
, slide3 = list(
alpha = list(ex = 2e-2, tf = 3e-2, lr = 3e-2, ia = 0.20),
title = "More protein-protein interaction"
)
, slide4 = list(
alpha = list(ex = 0.60, tf = 3e-2, lr = 3e-2, ia = 3e-2),
title = "Co-expression"
)
) %>% lapply(
function(slide) {
ggplot() +
# Co-expression segments
geom_segment(
data = ex$segment,
aes(x = x, y = y, xend = xend, yend = yend),
color = igraph::E(ex$graph)$color,
alpha = slide$alpha$ex,
size = 0.1,
linetype = "solid"
) +
# TF-target curves
geom_curve(
data = tf$segment,
lineend = "butt", # linejoin = "mitre",
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.005, "npc")),
color = if_else(igraph::E(tf$graph)$sign > 0, "red", "blue"),
alpha = slide$alpha$tf,
size = 0.2,
curvature = 0.1,
linetype = "solid"
) +
# Ligand-receptor curves
geom_curve(
data = lr$segment,
lineend = "butt", # linejoin = "mitre",
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.005, "npc")),
color = if_else(igraph::E(lr$graph)$sign > 0, "red", "blue"),
alpha = slide$alpha$lr,
size = 0.2,
curvature = 0.1,
linetype = "solid"
) +
# PPInteraction curves
geom_curve(
data = ia$segment,
lineend = "butt", # linejoin = "mitre",
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.005, "npc")),
color = if_else(igraph::E(ia$graph)$sign > 0, "red", "blue"),
alpha = slide$alpha$ia,
size = 0.2,
curvature = 0.1,
linetype = "solid"
) +
# Labels
geom_text(
data = ex$pos,
aes(x = x, y = y, label = igraph::V(ex$graph)$name),
size = 2,
color = igraph::V(ex$graph)$color
) +
theme_void() +
ggplot2::ggtitle(slide$title) +
theme(plot.title = element_text(hjust = 0.95, vjust = -3, size = 10))
}
)
})
# graph %>%
# igraph::plot.igraph(
# edge.width = 0.7,
# edge.color = igraph::E(.)$color,
#
# vertex.size = 0,
# vertex.shape = 'none',
# #vertex.color = "black",
# vertex.label.cex = 0.4,
# vertex.label.color = vcolor[igraph::V(.)$name],
#
# layout = graph_layout
# )
}
sce_dummy %>%
`[`(, (.)$group == "healthy") %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(sce_dummy.de$table %>% filter(src == "edgeR"))




requireNamespace("visNetwork")
## Loading required namespace: visNetwork
omnipath.in %>%
mutate(a = source_genesymbol, b = target_genesymbol) %>%
filter(a %in% get_goi_genes(), b %in% get_goi_genes()) %>%
select(a, b) %>%
igraph::graph_from_data_frame(directed = FALSE) %>%
visNetwork::visIgraph(layout = 'layout_with_kk')
# scenic_options <-
# SCENIC::initializeScenic(org = "mgi", dbDir = path_to("data/SCENIC/*cache"), dbs = SCENIC::defaultDbNames[["mgi"]], datasetTitle = "dummy", nCores = 1)
# sce_dummy@assays@data$counts %>%
# list(counts = .) %>% with({
# counts %>%
# `[`(SCENIC::geneFiltering(., scenicOptions = scenic_options), ) %>%
# #SCENIC::runCorrelation(scenic_options) %>%
# log1p() %>%
# SCENIC::runGenie3(scenic_options)
# })
#
#
# `%dopar%` <- foreach::`%dopar%`
# foreach <- foreach::foreach
# scenic_options %>%
# SCENIC::runSCENIC_1_coexNetwork2modules() %>%
# SCENIC::runSCENIC_2_createRegulons() %>%
# SCENIC::runSCENIC_3_scoreCells(log1p(sce_dummy@assays@data$counts))
Remove unused.
rm(cco_revelio)
gene.summary$hs <- NULL
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9279820 495.6 15718238 839.5 15718238 839.5
## Vcells 24001056 183.2 42555924 324.7 35396594 270.1
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| omnipath.tf | 66.00 | 160.0 | Mb |
| omnipath.in | 41.00 | 92.0 | Mb |
| gene.summary | 39.00 | 50.0 | Mb |
| omnipath.lr | 4.10 | 12.0 | Mb |
| show_coex_graph | 1.50 | 7.7 | Mb |
| sce_dummy | 0.52 | 6.2 | Mb |
| sce_dummy.go | 0.48 | 5.7 | Mb |
| goi_agg_by_group | 0.45 | 5.2 | Mb |
| show_de_table | 0.27 | 4.7 | Mb |
| heatmap | 0.26 | 4.5 | Mb |
# Collection of dataset loaders
load_dataset <- list()
# Summarizes each column in `cols`
# Call with results='asis'
tables <- function(sce, cols) {
for (col in cols) {
sce@colData %>%
as.data.frame() %>%
pull(col) %>%
table(useNA = "ifany") %>%
as.data.frame() %>%
rename_with(~col, ".") %>%
rename_with(~"#", Freq) %>%
t() %>%
kable() %>%
print()
# https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#generate-multiple-tables-from-a-for-loop
# https://stackoverflow.com/questions/52631689/how-to-show-formatted-r-output-with-results-asis-in-rmarkdown
}
}
“Mouse Whole Cortex and Hippocampus 10x” dataset from Allen Brain, 2020: [explore], [download], [protocols].
The dataset contains ~1M cells. We have selected ~31k cells of type “Astro”, “Endo” and “VLMC”.
load_dataset$ab <- function(max.size = 9999) {
sce <-
make_sce(
counts = (
path_to("*/AllenBrain/Mouse-WCH*/*fewer_cells/data.*.gz") %>%
from_tsv() %>%
`[`(, sort(colnames(.)))
),
coldata = (
path_to("*/AllenBrain/Mouse-WCH*/f*cells/meta.*.gz") %>%
from_tsv() %>%
`[`(sort(rownames(.)), )
)
) %>%
# Only keep VLMC
`[`(, (.)$subclass_label == "VLMC") %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type: sometimes convenient below, esp. for plotting
`[`(, order((.)$cell_type_alias_label))
# conversion to matrix takes long, hence do it here
sce@assays@data$counts %<>% as.matrix()
# alias `celltype` column
sce@colData$celltype <- sce@colData$cell_type_alias_label
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "375_VLMC")
return(sce)
}
GSE98816: single cell RNA-seq of mouse brain vascular transcriptomes.
load_dataset$bh <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/Betsholtz-2018/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/Betsholtz-2018/*/meta.*.gz") %>% from_tsv()
) %>%
# Only keep FB
`[`(, (.)$celltype %in% c("FB1", "FB2")) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type
`[`(, order((.)$celltype))
sce@assays@data$counts %<>% as.matrix()
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "FB1")
return(sce)
}
Raw data from GSE113973 (last update: Nov 13, 2019), metadata from UCSC cell browser.
load_dataset$cb <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/CasteloBranco-2018/*/expr.*.gz") %>%
from_tsv() %>%
{
# TODO: 1810011O10Rik -> Tcim
# https://www.genecards.org/cgi-bin/carddisp.pl?gene=TCIM
(.)
} %>%
t(),
coldata = path_to("*/CasteloBranco-2018/*/meta.*.gz") %>%
from_tsv()
) %>%
# Only keep those cells
`[`(, (.)$celltype %in% c("VLMC1", "VLMC2", "VLMC3", "VLMC4")) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type
`[`(, order((.)$celltype))
sce@assays@data$counts %<>% as.matrix()
sce@colData$Group %<>% as.factor() %>% relevel(ref = "Ctrl")
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "VLMC3")
return(sce)
}
Expression data from GSE135186 (last update: Feb 10, 2021). The cell-to-cluster assignment is a private communication by D. Aran (2021-05-20). The associated paper is Dorrier-Aran-…-Daneman, 2021.
load_dataset$dm <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/Daneman-2020/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/Daneman-2020/*/meta.*.gz") %>%
from_tsv()
) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells (there shouldn't be any here)
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order samples
`[`(, order((.)$cluster))
# Drop cells without cluster (they separate in dim. red. plots)
sce %<>% `[`(, !is.na((.)$cluster))
sce@assays@data$counts %<>% as.matrix()
# Set factor reference level
sce@colData$group %<>% as.factor() %>% relevel(ref = "healthy")
sce@colData$cluster %<>% c() %>%
as.factor() %>%
relevel(ref = "0")
# Alias for consistency
sce@colData$celltype <- sce@colData$cluster
return(sce)
}
datasets <- load_dataset %>% lapply(\(loader) loader(max.size = 777))
datasets <- datasets %>% lapply(scater_attach)
# memo: `%<>%` doesn't seem to work with cache=TRUE





Remove unused.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9293144 496.4 15718238 839.5 15718238 839.5
## Vcells 79247077 604.7 199453428 1521.8 152788162 1165.7
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 460.00 | 620.0 | Mb |
| omnipath.tf | 66.00 | 160.0 | Mb |
| omnipath.in | 41.00 | 92.0 | Mb |
| gene.summary | 39.00 | 50.0 | Mb |
| omnipath.lr | 4.10 | 12.0 | Mb |
| show_coex_graph | 1.50 | 7.8 | Mb |
| sce_dummy | 0.52 | 6.3 | Mb |
| sce_dummy.go | 0.48 | 5.8 | Mb |
| goi_agg_by_group | 0.45 | 5.3 | Mb |
| show_de_table | 0.27 | 4.8 | Mb |
The following is an approximation of Extended Data Fig 4c from Dorrier-Aran-…-Daneman, 2021.
We check whether other datasets (besides “Daneman”) cluster by their cell types using the same set of genes.
The samples are presorted by “Daneman” cluster and by condition within each cluster.
datasets$dm %>%
{
# Remove the `celltype` columns (same as `cluster`)
.@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
# Order samples
.[, order(paste(.$cluster, .$subgroup))]
} %>%
show_dm_heatmap(colors = colors, cluster_cols = FALSE)

The samples are clustered.
datasets$ab %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

The samples are clustered.
datasets$bh %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

The samples are clustered.
datasets$cb %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

cco <- list(
ab = datasets$ab %>%
cell_cycle(batch.by = "celltype"),
bh = datasets$bh %>%
cell_cycle(batch.by = "celltype"),
cb = datasets$cb %>%
cell_cycle(batch.by = "celltype"),
dm = datasets$dm %>%
`[`(, (.)$subgroup %in% c("healthy1", "healthy2")) %>%
cell_cycle(batch.by = "subgroup")
)
## 2021-06-28 23:19:49: calculating optimal rotation: 2021-06-28 23:19:49: calculating PCA: 2021-06-28 23:19:49: assigning cell cycle phases: 2021-06-28 23:19:49: reading data: 0.33secs
## 1.8secs
## 3.25secs
## 18.91secs
## 2021-06-28 23:20:08: calculating optimal rotation: 2021-06-28 23:20:08: calculating PCA: 2021-06-28 23:20:08: assigning cell cycle phases: 2021-06-28 23:20:08: reading data: 0.14secs
## 1.88secs
## 2.17mins
## 2.49mins
## 2021-06-28 23:22:38: calculating optimal rotation: 2021-06-28 23:22:38: calculating PCA: 2021-06-28 23:22:38: assigning cell cycle phases: 2021-06-28 23:22:38: reading data: 0.22secs
## 1.7secs
## 2.46mins
## 2.64mins
## 2021-06-28 23:25:16: calculating optimal rotation: 2021-06-28 23:25:16: calculating PCA: 2021-06-28 23:25:16: assigning cell cycle phases: 2021-06-28 23:25:16: reading data: 1.58secs
## 4.78secs
## 6.24secs
## 15.43secs
cco$ab %>% cell_cycle_plot2d(colors = colors)

cco$bh %>% cell_cycle_plot2d(colors = colors)

cco$cb %>% cell_cycle_plot2d(colors = colors)

cco$dm %>% cell_cycle_plot2d(colors = colors)

datasets$ab %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$bh %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "Group", colors = colors)

datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "subgroup", colors = colors)

datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "cluster")

pairs.to.contrast <-
list(
list(A = goi$col$name, B = goi$air$name),
list(A = goi$glycolysis$name, B = goi$air$name)
)
show_ab_density <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
stopifnot(
all(sce@colData[[group.by]] %in% names(colors))
)
sce %<>%
scater::aggregateAcrossFeatures(
ids = symbol2goiset(rownames(.)),
average = TRUE,
use.assay.type = "logcounts"
)
pairs.to.contrast %>%
lapply(dual(function(A, B) {
data.frame(
a = sce@assays@data$logcounts[A, ],
b = sce@assays@data$logcounts[B, ],
g = sce@colData[[group.by]]
) %>% {
(.) %>%
ggplot(aes(x = (a - b), color = g)) +
labs(x = paste0(A, " - ", B)) +
labs(y = "Fraction of cells (i.e., density)") +
labs(color = group.by) +
scale_color_manual(values = colors, breaks = gglast(colour)) +
ggtitle(paste0(A, " vs ", B)) +
geom_density(size = 3)
}
}))
}
show_ab_2d <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
stopifnot(
all(sce@colData[[group.by]] %in% names(colors))
)
# Group -- for color
g <- sce@colData[[group.by]]
# Library size -- for size
s <- sce@assays@data$counts %>% colSums()
sce %<>%
scater::aggregateAcrossFeatures(
ids = symbol2goiset(rownames(.)),
average = TRUE,
use.assay.type = "logcounts"
)
pairs.to.contrast %>%
lapply(dual(function(A, B) {
data.frame(
x = sce@assays@data$logcounts[A, ],
y = sce@assays@data$logcounts[B, ],
g = g,
s = s
) %>% {
(.) %>%
ggplot(aes(x = x, y = y, color = g, size = s)) +
labs(x = A, y = B, color = group.by, size = "Library size") +
scale_color_manual(values = colors, breaks = gglast(colour)) +
geom_point(size = 5, alpha = 0.6) +
ggtitle(paste0(A, " vs ", B)) +
guides(shape = FALSE) +
geom_density_2d(size = 0.5) +
guides(color = guide_legend(override.aes = list(alpha = 1)))
}
}))
}
datasets$ab %>% show_ab_density(group.by = "celltype", colors = colors)


datasets$ab %>% show_ab_2d(group.by = "celltype", colors = colors)
## now dyn.load("/usr/lib/R/library/MASS/libs/MASS.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/isoband/libs/isoband.so") ...


datasets$bh %>% show_ab_density(group.by = "celltype", colors = colors)


datasets$bh %>% show_ab_2d(group.by = "celltype", colors = colors)


datasets$cb %>% show_ab_density(group.by = "celltype", colors = colors)


datasets$cb %>% show_ab_2d(group.by = "celltype", colors = colors)


datasets$dm %>% show_ab_density(group.by = "cluster")


datasets$dm %>% show_ab_2d(group.by = "cluster")


datasets$dm %>% show_ab_density(group.by = "subgroup", colors = colors)


datasets$dm %>% show_ab_2d(group.by = "subgroup", colors = colors)


de <- list(
# Other/375_VLMC
ab_other_vs_375 = datasets$ab %>% compute_de(~celltype),
# FB2/FB1:
bh_fb2_vs_fb1 = datasets$bh %>% compute_de(~celltype),
# EAE/Ctrl (exclude the VLMC3 cells because they separate on PCA):
cb_eae_vs_ctrl = datasets$cb %>% `[`(, (.)$celltype != "VLMC3") %>% compute_de(~Group),
# Other/VLMC3:
cb_other_vs_vlmc3 = datasets$cb %>% compute_de(~celltype),
# EAE/Healthy:
dm_eae_vs_hthy = datasets$dm %>% compute_de(~group)
)
de$ab_other_vs_375$table %>% show_de_table()
datasets$ab %>%
show_de_heatmap(
de$ab_other_vs_375$table %>% filter(src == "edgeR"),
colors = colors
)

de$ab_other_vs_375$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in 375_VLMC <-- LFC --> Up in Other")

de$ab_other_vs_375$table %>%
show_de_cmp_lfc()

de$ab_other_vs_375$table %>%
show_de_volcano() +
labs(x = "Up in 375_VLMC <-- LFC --> Up in Other", color = "Geneset")

de$bh_fb2_vs_fb1$table %>% show_de_table()
datasets$bh %>%
show_de_heatmap(
de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"),
colors = colors
)

de$bh_fb2_vs_fb1$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in FB1 <-- LFC --> Up in FB2")

de$bh_fb2_vs_fb1$table %>%
show_de_cmp_lfc()

de$bh_fb2_vs_fb1$table %>%
show_de_volcano() +
labs(x = "Up in FB1 <-- LFC --> Up in FB2", color = "Geneset")

de$cb_other_vs_vlmc3$table %>% show_de_table()
datasets$cb %>%
show_de_heatmap(
de$cb_other_vs_vlmc3$table %>% filter(src == "edgeR"),
colors = colors
)

de$cb_other_vs_vlmc3$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in VLMC3 <-- LFC --> Up in Other")

de$cb_other_vs_vlmc3$table %>%
show_de_cmp_lfc()

de$cb_other_vs_vlmc3$table %>%
show_de_volcano() +
labs(x = "Up in VLMC3 <-- LFC --> Up in Other", color = "Geneset")

de$cb_eae_vs_ctrl$table %>% show_de_table()
datasets$cb %>%
show_de_heatmap(
de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"),
colors = colors
)

de$cb_eae_vs_ctrl$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in Ctrl <-- LFC --> Up in EAE")

de$cb_eae_vs_ctrl$table %>%
show_de_cmp_lfc()

de$cb_eae_vs_ctrl$table %>%
show_de_volcano() +
labs(x = "Up in Ctrl <-- LFC --> Up in EAE", color = "Geneset")

de$dm_eae_vs_hthy$table %>% show_de_table()
datasets$dm %>%
{
# Remove the `celltype` columns (same as `cluster`)
.@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
# Order samples
.[, order(.$subgroup)]
} %>%
show_de_heatmap(
de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"),
cluster_cols = FALSE,
colors = colors
)

de$dm_eae_vs_hthy$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Healthy <-- LFC --> EAE")

de$dm_eae_vs_hthy$table %>%
show_de_cmp_lfc()

de$dm_eae_vs_hthy$table %>%
show_de_volcano() +
labs(x = "Healthy <-- LFC --> EAE", color = "Geneset")

go <-
list(A = 33, B = 77, C = 144) %>% lapply(function(n) {
de %>% lapply(function(de_result) {
de_result$table %>% compute_go(n = n)
})
})
go$A$ab_other_vs_375 %>% show_go_table()
go$A$bh_fb2_vs_fb1 %>% show_go_table()
go$A$cb_other_vs_vlmc3 %>% show_go_table()
go$A$cb_eae_vs_ctrl %>% show_go_table()
go$A$dm_eae_vs_hthy %>% show_go_table()
The straight edges are the top negative (blue) and positive (red) gene-gene Spearman correlations among the “genes of interest” within each sub-dataset. The nodes are colored by condition.
The arched edges are TF-target, ligand-receptor, protein-protein interactions from OmniPath.
Click on the figure to change the view; shift-click to enlarge.
datasets$ab %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "374_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "375_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "376_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "FB1") %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "FB2") %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
`[`(, .$Group == "Ctrl") %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
`[`(, .$Group == "EAE") %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup %in% c("healthy1", "healthy2")) %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "healthy3") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "EAE1") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "EAE2") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




Clean up.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9337612 498.7 15718238 839.5 15718238 839.5
## Vcells 140759856 1074.0 239641512 1828.4 239641512 1828.4
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 460.00 | 640.0 | Mb |
| omnipath.tf | 66.00 | 180.0 | Mb |
| omnipath.in | 41.00 | 120.0 | Mb |
| gene.summary | 39.00 | 74.0 | Mb |
| de | 17.00 | 36.0 | Mb |
| go | 5.60 | 19.0 | Mb |
| omnipath.lr | 4.10 | 13.0 | Mb |
| show_coex_graph | 1.50 | 9.0 | Mb |
| sce_dummy | 0.52 | 7.5 | Mb |
| sce_dummy.go | 0.48 | 7.0 | Mb |
de_tables <- list(
a = de$ab_other_vs_375$table %>% mutate(name = "AB: Other/375_VLMC"),
b = de$bh_fb2_vs_fb1$table %>% mutate(name = "Bh: FB2/FB1"),
c1 = de$cb_other_vs_vlmc3$table %>% mutate(name = "CB: Other/VLMC3"),
c2 = de$cb_eae_vs_ctrl$table %>% mutate(name = "CB': EAE/Ctrl"),
d = de$dm_eae_vs_hthy$table %>% mutate(name = "Dm: EAE/Healthy")
)
de_tables %>%
dual(rbind) %>%
slice_sample(n = 7)
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ps/libs/ps.so") ...
## # A tibble: 7 x 7
## symbol base lfc p src color name
## * <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 Rcor3 0.103 -0.217 0.999 DESeq2 #4A940CFF AB: Other/375_VLMC
## 2 Ascc1 6.25 0.0726 1 edgeR #BF733AFF CB': EAE/Ctrl
## 3 Sri 7.60 -0.599 1 edgeR #698411FF CB': EAE/Ctrl
## 4 C130079G13Rik 2.41 0 1 edgeR #7E801BFF CB': EAE/Ctrl
## 5 Sf3b1 5.10 0.875 0.931 DESeq2 #1824FBFF Bh: FB2/FB1
## 6 Pa2g4 3.32 -0.768 1 DESeq2 #BD6A3BFF CB: Other/VLMC3
## 7 Gpr135 0.0305 0.420 0.999 DESeq2 #61C247FF AB: Other/375_VLMC
datasets$combo <-
list(
ab = data.frame(
datasets$ab@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "AB",
subgroup = datasets$ab$celltype
),
bh = data.frame(
datasets$bh@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "Bh",
subgroup = datasets$bh$celltype
),
cb = data.frame(
datasets$cb@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "CB",
subgroup = datasets$cb$Group
),
dm = data.frame(
datasets$dm@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "Dm",
subgroup = datasets$dm$subgroup
) %>% {
(.)[sample(rownames(.), size = min(nrow(.), 777)), ]
}
) %>%
lapply(function(ds) {
ds[, Reduce(intersect, lapply(., colnames))]
}) %>%
dual(rbind) %>%
{
make_sce(
(.) %>% select(-group, -subgroup) %>% t(),
(.) %>% select(group, subgroup)
)
}
print(datasets$combo)
## class: SingleCellExperiment
## dim: 301 1002
## metadata(0):
## assays(1): counts
## rownames(301): Fos Fosb ... Tpm2 Actb
## rowData names(0):
## colnames(1002): ab.TGTGGTAGTACCGGCT-L8TX_180406_01_E06
## ab.GACCAATTCCTGCCAT-L8TX_180926_01_E01 ... dm.EAE1_AAGTCTGAGTGACTCT
## dm.healthy1_CAGGTGCGTTTGGCGC
## colData names(2): group subgroup
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
Compare the gene LFC (edgeR) across datasets. Each dot is a gene with x-coordinate as the LFC in one dataset and y-coordinate as the LFC in another.
de_tables %>%
lapply(function(t) {
t %>%
group_by(name, src) %>%
mutate(lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
ungroup() %>%
filter(src == "edgeR")
}) %>%
with({
rbind(
merge(a, b, by = "symbol"),
merge(a, c1, by = "symbol"),
merge(a, c2, by = "symbol"),
merge(a, d, by = "symbol"),
merge(b, c1, by = "symbol"),
merge(b, c2, by = "symbol"),
merge(b, d, by = "symbol"),
merge(c1, d, by = "symbol"),
merge(c2, d, by = "symbol")
)
}) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
{
ggplot(., aes(x = lfc.x, y = lfc.y)) +
# Background
geom_point(alpha = 0.05, color = "black", size = 0.7) +
geom_smooth(formula = y ~ x, method = "lm", size = 0.2, color = "gray") +
# Foreground
geom_point(
data = filter(., goi_set != "Other"),
aes(color = goi_set),
stroke = 0, size = 1, alpha = 0.7
) +
# geom_smooth(data = filter(., goi_set != "Other"), formula = y ~ x, method = 'lm', size = 0.2) +
labs(x = "LFC", y = "LFC", color = "") +
# scale_color_brewer(palette = "Set1") +
facet_grid(
cols = vars(name.x), rows = vars(name.y),
scale = "free"
) +
theme(
legend.position = "bottom",
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.text = element_text(size = 10),
axis.title = element_blank()
) +
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
}

The following table shows the top genes that are DE (one way or another according to edgeR) simultaneously in several datasets.
make_common_de_table <- function(de_tables, slicer = slice_max) {
de_tables %>%
lapply(function(t) {
t %>%
group_by(name, src) %>%
mutate(norm.lfc = ((lfc - mean(lfc)) / sd(lfc))) %>%
slicer(abs(norm.lfc), n = 777) %>%
ungroup() %>%
filter(src == "edgeR")
}) %>%
dual(rbind) %>%
as.data.frame() %>%
{
# print(head(.))
.
} %>%
select(symbol, norm.lfc, name) %>%
stats::reshape(direction = "wide", idvar = "symbol", timevar = "name") %>%
# Commonly-DE first
arrange(desc(rowSums(!is.na(across(where(is.numeric)))))) %>%
# Signature of NAs
rowwise() %>%
mutate(signature = paste(across(where(is.numeric), ~ ifelse(is.na(.x), "N", "Y")), collapse = "")) %>%
ungroup() %>%
# Role: TF, Rec, Lig, etc
mutate(role = symbol2role(symbol)) %>%
# Reset rownames
as.data.frame(row.names = 1:nrow(.)) %>%
# Formatting
mutate(symbol = as.link(symbol)) %>%
mutate(across(where(is.numeric), signif, 3)) %>%
DT::datatable(escape = FALSE, rownames = TRUE)
}
de_tables %>% make_common_de_table(slicer = slice_max)
datasets$grouped <-
list(
list(ds = datasets$ab, name = "AB", group.by = "celltype"),
list(ds = datasets$bh, name = "Bh", group.by = "celltype"),
list(ds = datasets$cb, name = "CB", group.by = "celltype"),
list(ds = datasets$cb, name = "CB", group.by = "Group"),
list(ds = datasets$dm, name = "Dm", group.by = "cluster"),
list(ds = datasets$dm, name = "Dm", group.by = "subgroup")
) %>%
lapply(dual(
function(ds, name, group.by) {
ds %>%
# Subset genes
take_rows(
unique(goi %>% lapply(as.data.frame) %>% dual(rbind) %>% pull(genes))
) %>%
scuttle::aggregateAcrossCells(ids = ds[[group.by]], use.assay.type = "logcounts", statistics = "mean") %>%
SingleCellExperiment::logcounts() %>%
as.data.frame() %>%
ind2col("symbol") %>%
reshape2::melt(id.vars = "symbol", variable.name = "subgroup", value.name = "expression") %>%
mutate(dataset = name, subdataset = paste0(name, " (", group.by, ")"), group = group.by) %>%
mutate(role = symbol2role(symbol))
# TODO: add description?
}
)) %>%
dual(rbind)
interesting <- c(
# Daneman clusters
c("Rbp1", "Fn1", "Igfbp7"),
# Aerobic
c("Cox3b", "Cox7c", "Bloc1s1"),
# Anaerobic glycolysis
c("Ier3"),
# Collagen
c("Col4a1", "Col4a2", "Col8a1", "Col3a1"),
# Fib. migration
c("Sdc4")
)
show_stack <- function(ds) {
ds$role
ds %>%
mutate(role = if_else(role == "", "Unknown", role)) %>%
group_by(subdataset) %>%
mutate(expression = 100 * expression / max(expression)) %>%
ungroup() %>%
mutate(
symbol = if_else(symbol %in% interesting, glue("<span style='color:red;'>{symbol}</span>"), symbol)
) %>%
mutate(color = c(ramp(0:7), colors)[as.character(subgroup)]) %>%
mutate(subgroup = glue("<span name='{subgroup}' style='color:{color};'>{subgroup}</span>")) %>%
# Append role to symbol
# mutate(symbol = paste0(symbol, " (", role, ")")) %>%
# Heatmaps
ggplot(aes(x = subgroup, y = symbol, fill = expression)) +
geom_tile() +
scale_y_discrete(limits = rev) +
scale_fill_gradient(low = "black", high = "yellow") +
labs(fill = "%") +
theme(
axis.text.x = ggtext::element_markdown(
size = 12, angle = 90, vjust = 0.5, hjust = 1
),
axis.title = element_blank(),
legend.title = element_text(size = 9),
legend.text = element_text(size = 9),
# Remove the `role` strip altogether
# strip.text.y = element_blank(),
strip.text.y = element_text(color = "black", angle = 0, size = 8),
strip.text.x = element_text(color = "black", size = 10),
) +
facet_grid(
cols = vars(subdataset),
rows = vars(role),
space = "free",
scales = "free",
labeller = label_wrap_gen(5)
) +
theme(axis.text.y = ggtext::element_markdown())
}
(0:7) %>%
lapply(function(i) {
datasets$grouped %>%
filter(symbol %in% goi[[paste0("c", i)]]$genes) %>%
show_stack() +
ggtitle(paste0("Cluster #", i)) +
theme(axis.text.y = ggtext::element_markdown(size = 12))
})








datasets$grouped %>%
filter(symbol %in% goi$neuroinflammation$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 12))

datasets$grouped %>%
filter(symbol %in% goi$col$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 9))

datasets$grouped %>%
filter(symbol %in% goi$air$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 7))

datasets$grouped %>%
filter(symbol %in% goi$glycolysis$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 6))

datasets$grouped %>%
filter(symbol %in% goi$fib_mig$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 10))

For some of the gene sets, the following tabs show reduced dimension plots (PCA/TSNE/UMAP) once the combined dataset is restricted to that gene set.
colors # required below
## 374_VLMC 375_VLMC 376_VLMC FB1
## "aquamarine3" "chartreuse4" "chartreuse3" "cornflowerblue"
## FB2 Ctrl EAE VLMC1
## "blue" "chartreuse4" "coral2" "chartreuse1"
## VLMC3 VLMC4 healthy EAE1
## "coral3" "coral4" "green" "coral1"
## EAE2 healthy1 healthy2 healthy3
## "coral3" "aquamarine" "chartreuse3" "chartreuse4"
## positive negative G1.S S
## "Red4" "Steelblue1" "#E16A86" "#B88A00"
## G2 G2.M M.G1
## "#50A315" "#00AD9A" "#009ADE"
common_map <- function(sce, dimred = NULL) {
sce %<>% drop_empty()
if (is.null(dimred)) {
sce %<>% norm1() %>% scater_attach()
return(list(
PCA = common_map(sce, dimred = "PCA"),
TSNE = common_map(sce, dimred = "TSNE"),
UMAP = common_map(sce, dimred = "UMAP")
))
}
sce %>%
{
cbind(
.@colData %>% as.data.frame(),
list(
TSNE = .@int_colData$reducedDims$TSNE %>%
as.data.frame() %>%
select(x = V1, y = V2),
UMAP = .@int_colData$reducedDims$UMAP %>%
as.data.frame() %>%
select(x = V1, y = V2),
PCA = .@int_colData$reducedDims$PCA %>%
as.data.frame() %>%
select(x = PC1, y = PC2)
)[[dimred]]
)
} %>%
group_by(group) %>%
arrange(subgroup) %>%
mutate(rk = as.factor(dense_rank(subgroup))) %>%
ungroup() %>%
mutate(color = colors[as.character(subgroup)]) %>%
mutate(subgroup = paste0(group, ": ", subgroup)) %>%
{
ggplot((.), aes(x = x, y = y, color = subgroup)) +
geom_point(data = ((.) %>% select(-group)), alpha = 0.05, size = 1) +
geom_point(alpha = 0.5, size = 1.5) +
facet_wrap(. ~ group) +
scale_color_manual(values = (select(., color, subgroup) %>% pull(color, subgroup))) +
ggplot2::ggtitle(dimred) +
theme(
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title = element_text(),
) +
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
}
}
common_map_3d <- function(sce, color.by = "subgroup") {
sce %<>%
drop_empty() %>%
norm1() %>%
scater::logNormCounts() %>%
scater::runPCA()
data.frame(
color_group = sce@colData[[color.by]],
lib.size = sce@assays@data$counts %>% colSums(),
SingleCellExperiment::reducedDim(sce, "PCA") %>%
as.data.frame() %>%
select(PC1, PC2, PC3)
) %>%
group_by(color_group) %>%
sample(size = 33, replace = TRUE) %>%
ungroup() %>%
# plotly::plot_ly(type = "scatter3d", mode = "markers",
plotly::plot_ly() %>%
plotly::add_markers(
x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(size = 2, opacity = 1),
color = ~color_group, colors = colors
) %>%
plotly::layout(
scene = list(
xaxis = list(title = "PC1", showticklabels = FALSE),
yaxis = list(title = "PC2", showticklabels = FALSE),
zaxis = list(title = "PC3", showticklabels = FALSE)
)
)
}
datasets$combo %>% common_map()



datasets$combo %>% common_map_3d()
geneset <- goi$neuroinflammation$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$col$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$air$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$glycolysis$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$fib_mig$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
For each dataset we compare the most-DE genes to those from the kidney fibroblast activation gene set SI7 (see above). The LFCs are first standardized and filtered to top absolute LFC.
de_tables_vs_si7 <-
de_tables %>%
dual(rbind) %>%
group_by(name, src) %>%
mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
ungroup() %>%
merge(
y = fb_genes_si7 %>%
select(lfc = logFC) %>%
ind2col("symbol") %>%
mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
rename(norm.lfc.ref = norm.lfc),
by = "symbol",
all = TRUE
) %>%
mutate(delta = norm.lfc - norm.lfc.ref)
We only keep the LFC from edgeR.
de_tables_vs_si7 %>%
tidyr::drop_na() %>%
filter(src == "edgeR") %>%
group_by(name, src) %>%
mutate(is_out = FALSE) %>%
mutate(
is_out = is_out |
symbol %in% (slice_min(., norm.lfc, n = 5) %>% pull(symbol)) |
symbol %in% (slice_max(., norm.lfc, n = 5) %>% pull(symbol))
) %>%
mutate(
is_out = is_out |
symbol %in% (slice_min(., norm.lfc.ref, n = 5) %>% pull(symbol)) |
symbol %in% (slice_max(., norm.lfc.ref, n = 5) %>% pull(symbol))
) %>%
mutate(label = if_else(is_out, symbol, "")) %>%
ungroup() %>%
ggplot(aes(x = norm.lfc, y = norm.lfc.ref, color = name)) +
ggrepel::geom_text_repel(
aes(label = label),
hjust = 1.1, size = 3, show.legend = FALSE,
max.overlaps = Inf, point.size = NA,
segment.alpha = 0.2
) +
geom_point(size = 1, alpha = 0.3) +
geom_hline(yintercept = 0, alpha = 0.2) +
geom_vline(xintercept = 0, alpha = 0.2) +
labs(x = "Standardized LFC in Dataset", y = "Standardized LFC in Reference", color = "Dataset") +
scale_color_brewer(palette = "Set1") +
theme(
legend.title = element_text(size = 9),
legend.text = element_text(size = 9),
legend.position = "bottom"
) +
guides(color = guide_legend(override.aes = list(alpha = 1, size = 3)))

The genes that have a very different LFC (between two conditions within a dataset) from the set SI7 (see data sources) are the one with the most positive or most negative “delta”.
de_tables_vs_si7 %>%
tidyr::drop_na() %>%
select(symbol, dataset = name, src, norm.lfc, norm.lfc.ref, delta) %>%
mutate(symbol = as.link(symbol)) %>%
DT::datatable(escape = FALSE)
# Genes DE in EAE but not in AB dataset
Clean up.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9410844 502.6 15718238 839.5 15718238 839.5
## Vcells 145335838 1108.9 287649814 2194.6 287646791 2194.6
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 460.0 | 670.0 | Mb |
| omnipath.tf | 66.0 | 200.0 | Mb |
| omnipath.in | 41.0 | 140.0 | Mb |
| gene.summary | 39.0 | 95.0 | Mb |
| de_tables | 19.0 | 57.0 | Mb |
| de | 17.0 | 38.0 | Mb |
| go | 5.6 | 21.0 | Mb |
| omnipath.lr | 4.1 | 15.0 | Mb |
| show_coex_graph | 1.5 | 11.0 | Mb |
| de_tables_vs_si7 | 1.3 | 9.4 | Mb |